1use std::collections::{HashMap, HashSet};
2
3use super::graph_index::ProjectIndex;
4
5use super::neural::attention_learned::LearnedAttention;
6
7#[derive(Debug, Clone)]
8pub struct RelevanceScore {
9 pub path: String,
10 pub score: f64,
11 pub recommended_mode: &'static str,
12}
13
14pub fn compute_relevance(
15 index: &ProjectIndex,
16 task_files: &[String],
17 task_keywords: &[String],
18) -> Vec<RelevanceScore> {
19 let adj = build_adjacency_resolved(index);
20 let all_nodes: Vec<String> = index.files.keys().cloned().collect();
21 if all_nodes.is_empty() {
22 return Vec::new();
23 }
24
25 let node_idx: HashMap<&str, usize> = all_nodes
26 .iter()
27 .enumerate()
28 .map(|(i, n)| (n.as_str(), i))
29 .collect();
30 let n = all_nodes.len();
31
32 let degrees: Vec<f64> = all_nodes
34 .iter()
35 .map(|node| {
36 adj.get(node)
37 .map_or(0.0, |neigh| neigh.len() as f64)
38 .max(1.0)
39 })
40 .collect();
41
42 let mut heat: Vec<f64> = vec![0.0; n];
44 for f in task_files {
45 if let Some(&idx) = node_idx.get(f.as_str()) {
46 heat[idx] = 1.0;
47 }
48 }
49
50 let alpha = 0.5;
53 let iterations = 4;
54 for _ in 0..iterations {
55 let mut new_heat = vec![0.0; n];
56 for (i, node) in all_nodes.iter().enumerate() {
57 let self_term = (1.0 - alpha) * heat[i];
58 let mut neighbor_sum = 0.0;
59 if let Some(neighbors) = adj.get(node) {
60 for neighbor in neighbors {
61 if let Some(&j) = node_idx.get(neighbor.as_str()) {
62 neighbor_sum += heat[j] / degrees[j];
63 }
64 }
65 }
66 new_heat[i] = self_term + alpha * neighbor_sum;
67 }
68 heat = new_heat;
69 }
70
71 let mut pagerank = vec![1.0 / n as f64; n];
73 let damping = 0.85;
74 for _ in 0..8 {
75 let mut new_pr = vec![(1.0 - damping) / n as f64; n];
76 for (i, node) in all_nodes.iter().enumerate() {
77 if let Some(neighbors) = adj.get(node) {
78 let out_deg = neighbors.len().max(1) as f64;
79 for neighbor in neighbors {
80 if let Some(&j) = node_idx.get(neighbor.as_str()) {
81 new_pr[j] += damping * pagerank[i] / out_deg;
82 }
83 }
84 }
85 }
86 pagerank = new_pr;
87 }
88
89 let mut scores: HashMap<String, f64> = HashMap::new();
91 let heat_max = heat.iter().copied().fold(0.0_f64, f64::max).max(1e-10);
92 let pr_max = pagerank.iter().copied().fold(0.0_f64, f64::max).max(1e-10);
93
94 for (i, node) in all_nodes.iter().enumerate() {
95 let h = heat[i] / heat_max;
96 let pr = pagerank[i] / pr_max;
97 let combined = h * 0.8 + pr * 0.2;
98 if combined > 0.01 {
99 scores.insert(node.clone(), combined);
100 }
101 }
102
103 if !task_keywords.is_empty() {
105 let kw_lower: Vec<String> = task_keywords.iter().map(|k| k.to_lowercase()).collect();
106 for (file_path, file_entry) in &index.files {
107 let path_lower = file_path.to_lowercase();
108 let mut keyword_hits = 0;
109 for kw in &kw_lower {
110 if path_lower.contains(kw) {
111 keyword_hits += 1;
112 }
113 for export in &file_entry.exports {
114 if export.to_lowercase().contains(kw) {
115 keyword_hits += 1;
116 }
117 }
118 }
119 if keyword_hits > 0 {
120 let boost = (keyword_hits as f64 * 0.15).min(0.6);
121 let entry = scores.entry(file_path.clone()).or_insert(0.0);
122 *entry = (*entry + boost).min(1.0);
123 }
124 }
125 }
126
127 let mut result: Vec<RelevanceScore> = scores
128 .into_iter()
129 .map(|(path, score)| {
130 let mode = recommend_mode(score);
131 RelevanceScore {
132 path,
133 score,
134 recommended_mode: mode,
135 }
136 })
137 .collect();
138
139 result.sort_by(|a, b| {
140 b.score
141 .partial_cmp(&a.score)
142 .unwrap_or(std::cmp::Ordering::Equal)
143 });
144 result
145}
146
147pub fn compute_relevance_from_intent(
148 index: &ProjectIndex,
149 intent: &super::intent_engine::StructuredIntent,
150) -> Vec<RelevanceScore> {
151 use super::intent_engine::IntentScope;
152
153 let mut file_seeds: Vec<String> = Vec::new();
154 let mut extra_keywords: Vec<String> = intent.keywords.clone();
155
156 for target in &intent.targets {
157 if target.contains('.') || target.contains('/') {
158 let matched = resolve_target_to_files(index, target);
159 if matched.is_empty() {
160 extra_keywords.push(target.clone());
161 } else {
162 file_seeds.extend(matched);
163 }
164 } else {
165 let from_symbol = resolve_symbol_to_files(index, target);
166 if from_symbol.is_empty() {
167 extra_keywords.push(target.clone());
168 } else {
169 file_seeds.extend(from_symbol);
170 }
171 }
172 }
173
174 if let Some(lang) = &intent.language_hint {
175 let lang_ext = match lang.as_str() {
176 "rust" => Some("rs"),
177 "typescript" => Some("ts"),
178 "javascript" => Some("js"),
179 "python" => Some("py"),
180 "go" => Some("go"),
181 "ruby" => Some("rb"),
182 "java" => Some("java"),
183 _ => None,
184 };
185 if let Some(ext) = lang_ext {
186 if file_seeds.is_empty() {
187 for path in index.files.keys() {
188 if path.ends_with(&format!(".{ext}")) {
189 extra_keywords.push(
190 std::path::Path::new(path)
191 .file_stem()
192 .and_then(|s| s.to_str())
193 .unwrap_or("")
194 .to_string(),
195 );
196 break;
197 }
198 }
199 }
200 }
201 }
202
203 let mut result = compute_relevance(index, &file_seeds, &extra_keywords);
204
205 match intent.scope {
206 IntentScope::SingleFile => {
207 result.truncate(5);
208 }
209 IntentScope::MultiFile => {
210 result.truncate(15);
211 }
212 IntentScope::CrossModule | IntentScope::ProjectWide => {}
213 }
214
215 result
216}
217
218fn resolve_target_to_files(index: &ProjectIndex, target: &str) -> Vec<String> {
219 let mut matches = Vec::new();
220 for path in index.files.keys() {
221 if path.ends_with(target) || path.contains(target) {
222 matches.push(path.clone());
223 }
224 }
225 matches
226}
227
228fn resolve_symbol_to_files(index: &ProjectIndex, symbol: &str) -> Vec<String> {
229 let sym_lower = symbol.to_lowercase();
230 let mut matches = Vec::new();
231 for entry in index.symbols.values() {
232 let name_lower = entry.name.to_lowercase();
233 if (name_lower == sym_lower || name_lower.contains(&sym_lower))
234 && !matches.contains(&entry.file)
235 {
236 matches.push(entry.file.clone());
237 }
238 }
239 if matches.is_empty() {
240 for (path, file_entry) in &index.files {
241 if file_entry
242 .exports
243 .iter()
244 .any(|e| e.to_lowercase().contains(&sym_lower))
245 && !matches.contains(path)
246 {
247 matches.push(path.clone());
248 }
249 }
250 }
251 matches
252}
253
254fn recommend_mode(score: f64) -> &'static str {
255 if score >= 0.8 {
256 "full"
257 } else if score >= 0.5 {
258 "signatures"
259 } else if score >= 0.2 {
260 "map"
261 } else {
262 "reference"
263 }
264}
265
266fn build_adjacency_resolved(index: &ProjectIndex) -> HashMap<String, Vec<String>> {
271 let module_to_file = build_module_map(index);
272 let mut adj: HashMap<String, Vec<String>> = HashMap::new();
273
274 for edge in &index.edges {
275 let from = &edge.from;
276 let to_resolved = module_to_file
277 .get(&edge.to)
278 .cloned()
279 .unwrap_or_else(|| edge.to.clone());
280
281 if index.files.contains_key(from) && index.files.contains_key(&to_resolved) {
282 adj.entry(from.clone())
283 .or_default()
284 .push(to_resolved.clone());
285 adj.entry(to_resolved).or_default().push(from.clone());
286 }
287 }
288 adj
289}
290
291fn build_module_map(index: &ProjectIndex) -> HashMap<String, String> {
294 let file_paths: Vec<&str> = index
295 .files
296 .keys()
297 .map(std::string::String::as_str)
298 .collect();
299 let mut mapping: HashMap<String, String> = HashMap::new();
300
301 let edge_targets: HashSet<String> = index.edges.iter().map(|e| e.to.clone()).collect();
302
303 for target in &edge_targets {
304 if index.files.contains_key(target) {
305 mapping.insert(target.clone(), target.clone());
306 continue;
307 }
308
309 if let Some(resolved) = resolve_module_to_file(target, &file_paths) {
310 mapping.insert(target.clone(), resolved);
311 }
312 }
313
314 mapping
315}
316
317fn resolve_module_to_file(module_path: &str, file_paths: &[&str]) -> Option<String> {
318 let cleaned = module_path
319 .trim_start_matches("crate::")
320 .trim_start_matches("super::");
321
322 let parts: Vec<&str> = cleaned.split("::").collect();
324
325 for end in (1..=parts.len()).rev() {
327 let candidate = parts[..end].join("/");
328
329 for fp in file_paths {
331 let fp_normalized = fp
332 .trim_start_matches("rust/src/")
333 .trim_start_matches("src/");
334
335 if fp_normalized == format!("{candidate}.rs")
336 || fp_normalized == format!("{candidate}/mod.rs")
337 || fp.ends_with(&format!("/{candidate}.rs"))
338 || fp.ends_with(&format!("/{candidate}/mod.rs"))
339 {
340 return Some(fp.to_string());
341 }
342 }
343 }
344
345 if let Some(last) = parts.last() {
347 let stem = format!("{last}.rs");
348 for fp in file_paths {
349 if fp.ends_with(&stem) {
350 return Some(fp.to_string());
351 }
352 }
353 }
354
355 None
356}
357
358pub fn parse_task_hints(task_description: &str) -> (Vec<String>, Vec<String>) {
360 let mut files = Vec::new();
361 let mut keywords = Vec::new();
362
363 for word in task_description.split_whitespace() {
364 let clean = word.trim_matches(|c: char| {
365 !c.is_alphanumeric() && c != '.' && c != '/' && c != '_' && c != '-'
366 });
367 if clean.contains('.') && {
368 let p = std::path::Path::new(clean);
369 clean.contains('/')
370 || p.extension().is_some_and(|e| {
371 e.eq_ignore_ascii_case("rs")
372 || e.eq_ignore_ascii_case("ts")
373 || e.eq_ignore_ascii_case("py")
374 || e.eq_ignore_ascii_case("go")
375 || e.eq_ignore_ascii_case("js")
376 })
377 } {
378 files.push(clean.to_string());
379 } else if clean.len() >= 3 && !STOP_WORDS.contains(&clean.to_lowercase().as_str()) {
380 keywords.push(clean.to_string());
381 }
382 }
383
384 (files, keywords)
385}
386
387const STOP_WORDS: &[&str] = &[
388 "the", "and", "for", "that", "this", "with", "from", "have", "has", "was", "are", "been",
389 "not", "but", "all", "can", "had", "her", "one", "our", "out", "you", "its", "will", "each",
390 "make", "like", "fix", "add", "use", "get", "set", "run", "new", "old", "should", "would",
391 "could", "into", "also", "than", "them", "then", "when", "just", "only", "very", "some",
392 "more", "other", "nach", "und", "die", "der", "das", "ist", "ein", "eine", "nicht", "auf",
393 "mit",
394];
395
396struct StructuralWeights {
397 error_handling: f64,
398 definition: f64,
399 control_flow: f64,
400 closing_brace: f64,
401 other: f64,
402}
403
404impl StructuralWeights {
405 const DEFAULT: Self = Self {
406 error_handling: 1.5,
407 definition: 1.0,
408 control_flow: 0.5,
409 closing_brace: 0.15,
410 other: 0.3,
411 };
412
413 fn for_task_type(task_type: Option<super::intent_engine::TaskType>) -> Self {
414 use super::intent_engine::TaskType;
415 match task_type {
416 Some(TaskType::FixBug) => Self {
417 error_handling: 2.0,
418 definition: 0.8,
419 control_flow: 0.8,
420 closing_brace: 0.1,
421 other: 0.2,
422 },
423 Some(TaskType::Debug) => Self {
424 error_handling: 2.0,
425 definition: 0.6,
426 control_flow: 1.0,
427 closing_brace: 0.1,
428 other: 0.2,
429 },
430 Some(TaskType::Generate) => Self {
431 error_handling: 0.8,
432 definition: 1.5,
433 control_flow: 0.3,
434 closing_brace: 0.15,
435 other: 0.4,
436 },
437 Some(TaskType::Refactor) => Self {
438 error_handling: 1.0,
439 definition: 1.5,
440 control_flow: 0.6,
441 closing_brace: 0.2,
442 other: 0.3,
443 },
444 Some(TaskType::Test) => Self {
445 error_handling: 1.2,
446 definition: 1.3,
447 control_flow: 0.4,
448 closing_brace: 0.15,
449 other: 0.3,
450 },
451 Some(TaskType::Review) => Self {
452 error_handling: 1.3,
453 definition: 1.2,
454 control_flow: 0.6,
455 closing_brace: 0.15,
456 other: 0.3,
457 },
458 None | Some(TaskType::Explore | _) => Self::DEFAULT,
459 }
460 }
461}
462
463pub fn information_bottleneck_filter(
474 content: &str,
475 task_keywords: &[String],
476 budget_ratio: f64,
477) -> String {
478 information_bottleneck_filter_typed(content, task_keywords, budget_ratio, None)
479}
480
481pub fn information_bottleneck_filter_typed(
483 content: &str,
484 task_keywords: &[String],
485 budget_ratio: f64,
486 task_type: Option<super::intent_engine::TaskType>,
487) -> String {
488 let lines: Vec<&str> = content.lines().collect();
489 if lines.is_empty() {
490 return String::new();
491 }
492
493 let n = lines.len();
494 let kw_lower: Vec<String> = task_keywords.iter().map(|k| k.to_lowercase()).collect();
495 let attention = LearnedAttention::with_defaults();
496
497 let mut global_token_freq: HashMap<&str, usize> = HashMap::new();
498 for line in &lines {
499 for token in line.split_whitespace() {
500 *global_token_freq.entry(token).or_insert(0) += 1;
501 }
502 }
503 let total_unique = global_token_freq.len().max(1) as f64;
504 let total_lines = n.max(1) as f64;
505
506 let task_token_set: HashSet<String> = kw_lower
507 .iter()
508 .flat_map(|kw| kw.split(|c: char| !c.is_alphanumeric()).map(String::from))
509 .filter(|t| t.len() >= 2)
510 .collect();
511
512 let effective_ratio = if task_token_set.is_empty() {
513 budget_ratio
514 } else {
515 adaptive_ib_budget(content, budget_ratio)
516 };
517
518 let weights = StructuralWeights::for_task_type(task_type);
519
520 let mut scored_lines: Vec<(usize, &str, f64)> = lines
521 .iter()
522 .enumerate()
523 .map(|(i, line)| {
524 let trimmed = line.trim();
525 if trimmed.is_empty() {
526 return (i, *line, 0.05);
527 }
528
529 let line_lower = trimmed.to_lowercase();
530 let line_tokens: Vec<&str> = trimmed.split_whitespace().collect();
531 let line_token_count = line_tokens.len().max(1) as f64;
532
533 let mi_score = if task_token_set.is_empty() {
534 0.0
535 } else {
536 let line_token_set: HashSet<String> =
537 line_tokens.iter().map(|t| t.to_lowercase()).collect();
538 let overlap: f64 = line_token_set
539 .iter()
540 .filter(|t| task_token_set.iter().any(|kw| t.contains(kw.as_str())))
541 .map(|t| {
542 let freq = *global_token_freq.get(t.as_str()).unwrap_or(&1) as f64;
543 (total_lines / freq).ln().max(0.1)
544 })
545 .sum();
546 overlap / line_token_count
547 };
548
549 let keyword_hits: f64 = kw_lower
550 .iter()
551 .filter(|kw| line_lower.contains(kw.as_str()))
552 .count() as f64;
553
554 let structural = if is_error_handling(trimmed) {
555 weights.error_handling
556 } else if is_definition_line(trimmed) {
557 weights.definition
558 } else if is_control_flow(trimmed) {
559 weights.control_flow
560 } else if is_closing_brace(trimmed) {
561 weights.closing_brace
562 } else {
563 weights.other
564 };
565 let relevance = mi_score * 0.4 + keyword_hits * 0.3 + structural;
566
567 let unique_in_line = line_tokens.iter().collect::<HashSet<_>>().len() as f64;
568 let token_diversity = unique_in_line / line_token_count;
569
570 let avg_idf: f64 = if line_tokens.is_empty() {
571 0.0
572 } else {
573 line_tokens
574 .iter()
575 .map(|t| {
576 let freq = *global_token_freq.get(t).unwrap_or(&1) as f64;
577 (total_unique / freq).ln().max(0.0)
578 })
579 .sum::<f64>()
580 / line_token_count
581 };
582 let information = (token_diversity * 0.4 + (avg_idf.min(3.0) / 3.0) * 0.6).min(1.0);
583
584 let pos = i as f64 / n.max(1) as f64;
585 let attn_weight = attention.weight(pos);
586
587 let score = (relevance * 0.6 + 0.05)
588 * (information * 0.25 + 0.05)
589 * (attn_weight * 0.15 + 0.05);
590
591 (i, *line, score)
592 })
593 .collect();
594
595 let budget = ((n as f64) * effective_ratio).ceil() as usize;
596
597 scored_lines.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
598
599 let selected = mmr_select(&scored_lines, budget, 0.3);
600
601 let mut output_lines: Vec<&str> = Vec::with_capacity(budget + 1);
602
603 if !kw_lower.is_empty() {
604 output_lines.push("");
605 }
606
607 for (_, line, _) in &selected {
608 output_lines.push(line);
609 }
610
611 if !kw_lower.is_empty() {
612 let summary = format!("[task: {}]", task_keywords.join(", "));
613 let mut result = summary;
614 result.push('\n');
615 result.push_str(&output_lines[1..].to_vec().join("\n"));
616 return result;
617 }
618
619 output_lines.join("\n")
620}
621
622fn mmr_select<'a>(
627 candidates: &[(usize, &'a str, f64)],
628 budget: usize,
629 lambda: f64,
630) -> Vec<(usize, &'a str, f64)> {
631 if candidates.is_empty() || budget == 0 {
632 return Vec::new();
633 }
634
635 let mut selected: Vec<(usize, &'a str, f64)> = Vec::with_capacity(budget);
636 let mut remaining: Vec<(usize, &'a str, f64)> = candidates.to_vec();
637
638 selected.push(remaining.remove(0));
640
641 while selected.len() < budget && !remaining.is_empty() {
642 let mut best_idx = 0;
643 let mut best_mmr = f64::NEG_INFINITY;
644
645 for (i, &(_, cand_line, cand_score)) in remaining.iter().enumerate() {
646 let cand_tokens: HashSet<&str> = cand_line.split_whitespace().collect();
647 if cand_tokens.is_empty() {
648 if cand_score > best_mmr {
649 best_mmr = cand_score;
650 best_idx = i;
651 }
652 continue;
653 }
654
655 let max_sim = selected
656 .iter()
657 .map(|&(_, sel_line, _)| {
658 let sel_tokens: HashSet<&str> = sel_line.split_whitespace().collect();
659 if sel_tokens.is_empty() {
660 return 0.0;
661 }
662 let inter = cand_tokens.intersection(&sel_tokens).count();
663 let union = cand_tokens.union(&sel_tokens).count();
664 if union == 0 {
665 0.0
666 } else {
667 inter as f64 / union as f64
668 }
669 })
670 .fold(0.0_f64, f64::max);
671
672 let mmr = cand_score - lambda * max_sim;
673 if mmr > best_mmr {
674 best_mmr = mmr;
675 best_idx = i;
676 }
677 }
678
679 selected.push(remaining.remove(best_idx));
680 }
681
682 selected
683}
684
685fn is_error_handling(line: &str) -> bool {
686 line.starts_with("return Err(")
687 || line.starts_with("Err(")
688 || line.starts_with("bail!(")
689 || line.starts_with("anyhow::bail!")
690 || line.contains(".map_err(")
691 || line.contains("unwrap()")
692 || line.contains("expect(\"")
693 || line.starts_with("raise ")
694 || line.starts_with("throw ")
695 || line.starts_with("catch ")
696 || line.starts_with("except ")
697 || line.starts_with("try ")
698 || (line.contains("?;") && !line.starts_with("//"))
699 || line.starts_with("panic!(")
700 || line.contains("Error::")
701 || line.contains("error!")
702}
703
704pub fn adaptive_ib_budget(content: &str, base_ratio: f64) -> f64 {
708 let lines: Vec<&str> = content.lines().collect();
709 if lines.len() < 10 {
710 return 1.0;
711 }
712
713 let mut token_freq: HashMap<&str, usize> = HashMap::new();
714 let mut total_tokens = 0usize;
715 for line in &lines {
716 for token in line.split_whitespace() {
717 *token_freq.entry(token).or_insert(0) += 1;
718 total_tokens += 1;
719 }
720 }
721
722 if total_tokens == 0 {
723 return base_ratio;
724 }
725
726 let unique_ratio = token_freq.len() as f64 / total_tokens as f64;
727 let repetition_factor = 1.0 - unique_ratio;
728
729 (base_ratio * (1.0 - repetition_factor * 0.3)).clamp(0.2, 1.0)
730}
731
732fn is_definition_line(line: &str) -> bool {
733 let prefixes = [
734 "fn ",
735 "pub fn ",
736 "async fn ",
737 "pub async fn ",
738 "struct ",
739 "pub struct ",
740 "enum ",
741 "pub enum ",
742 "trait ",
743 "pub trait ",
744 "impl ",
745 "type ",
746 "pub type ",
747 "const ",
748 "pub const ",
749 "static ",
750 "pub static ",
751 "class ",
752 "export class ",
753 "interface ",
754 "export interface ",
755 "function ",
756 "export function ",
757 "async function ",
758 "def ",
759 "async def ",
760 "func ",
761 ];
762 prefixes
763 .iter()
764 .any(|p| line.starts_with(p) || line.trim_start().starts_with(p))
765}
766
767fn is_control_flow(line: &str) -> bool {
768 let trimmed = line.trim();
769 trimmed.starts_with("if ")
770 || trimmed.starts_with("else ")
771 || trimmed.starts_with("match ")
772 || trimmed.starts_with("for ")
773 || trimmed.starts_with("while ")
774 || trimmed.starts_with("return ")
775 || trimmed.starts_with("break")
776 || trimmed.starts_with("continue")
777 || trimmed.starts_with("yield")
778 || trimmed.starts_with("await ")
779}
780
781fn is_closing_brace(line: &str) -> bool {
782 let trimmed = line.trim();
783 trimmed == "}" || trimmed == "};" || trimmed == "})" || trimmed == "});"
784}
785
786#[cfg(test)]
787mod tests {
788 use super::*;
789
790 #[test]
791 fn parse_task_finds_files_and_keywords() {
792 let (files, keywords) =
793 parse_task_hints("Fix the authentication bug in src/auth.rs and update tests");
794 assert!(files.iter().any(|f| f.contains("auth.rs")));
795 assert!(keywords
796 .iter()
797 .any(|k| k.to_lowercase().contains("authentication")));
798 }
799
800 #[test]
801 fn recommend_mode_by_score() {
802 assert_eq!(recommend_mode(1.0), "full");
803 assert_eq!(recommend_mode(0.6), "signatures");
804 assert_eq!(recommend_mode(0.3), "map");
805 assert_eq!(recommend_mode(0.1), "reference");
806 }
807
808 #[test]
809 fn info_bottleneck_preserves_definitions() {
810 let content = "fn main() {\n let x = 42;\n // boring comment\n println!(x);\n}\n";
811 let result = information_bottleneck_filter(content, &["main".to_string()], 0.6);
812 assert!(result.contains("fn main"), "definitions must be preserved");
813 assert!(result.contains("[task: main]"), "should have task summary");
814 }
815
816 #[test]
817 fn info_bottleneck_error_handling_priority() {
818 let content = "fn validate() {\n let data = parse()?;\n return Err(\"invalid\");\n let x = 1;\n let y = 2;\n}\n";
819 let result = information_bottleneck_filter(content, &["validate".to_string()], 0.5);
820 assert!(
821 result.contains("return Err"),
822 "error handling should survive filtering"
823 );
824 }
825
826 #[test]
827 fn info_bottleneck_score_sorted() {
828 let content = "fn important() {\n let x = 1;\n let y = 2;\n let z = 3;\n}\n}\n";
829 let result = information_bottleneck_filter(content, &[], 0.6);
830 let lines: Vec<&str> = result.lines().collect();
831 let def_pos = lines.iter().position(|l| l.contains("fn important"));
832 let brace_pos = lines.iter().position(|l| l.trim() == "}");
833 if let (Some(d), Some(b)) = (def_pos, brace_pos) {
834 assert!(
835 d < b,
836 "definitions should appear before closing braces in score-sorted output"
837 );
838 }
839 }
840
841 #[test]
842 fn adaptive_budget_reduces_for_repetitive() {
843 let repetitive = "let x = 1;\n".repeat(50);
844 let diverse = (0..50)
845 .map(|i| format!("let var_{i} = func_{i}(arg_{i});"))
846 .collect::<Vec<_>>()
847 .join("\n");
848 let budget_rep = super::adaptive_ib_budget(&repetitive, 0.7);
849 let budget_div = super::adaptive_ib_budget(&diverse, 0.7);
850 assert!(
851 budget_rep < budget_div,
852 "repetitive content should get lower budget"
853 );
854 }
855}