Skip to main content

lean_ctx/core/
task_relevance.rs

1use std::collections::{HashMap, HashSet};
2
3use super::graph_index::ProjectIndex;
4
5use super::neural::attention_learned::LearnedAttention;
6
7#[derive(Debug, Clone)]
8pub struct RelevanceScore {
9    pub path: String,
10    pub score: f64,
11    pub recommended_mode: &'static str,
12}
13
14pub fn compute_relevance(
15    index: &ProjectIndex,
16    task_files: &[String],
17    task_keywords: &[String],
18) -> Vec<RelevanceScore> {
19    let adj = build_adjacency_resolved(index);
20    let all_nodes: Vec<String> = index.files.keys().cloned().collect();
21    if all_nodes.is_empty() {
22        return Vec::new();
23    }
24
25    let node_idx: HashMap<&str, usize> = all_nodes
26        .iter()
27        .enumerate()
28        .map(|(i, n)| (n.as_str(), i))
29        .collect();
30    let n = all_nodes.len();
31
32    // Build degree-normalized adjacency for heat diffusion
33    let degrees: Vec<f64> = all_nodes
34        .iter()
35        .map(|node| {
36            adj.get(node)
37                .map_or(0.0, |neigh| neigh.len() as f64)
38                .max(1.0)
39        })
40        .collect();
41
42    // Seed vector: task files get 1.0
43    let mut heat: Vec<f64> = vec![0.0; n];
44    for f in task_files {
45        if let Some(&idx) = node_idx.get(f.as_str()) {
46            heat[idx] = 1.0;
47        }
48    }
49
50    // Heat diffusion: h(t+1) = (1-alpha)*h(t) + alpha * A_norm * h(t)
51    // Run for k iterations
52    let alpha = 0.5;
53    let iterations = 4;
54    for _ in 0..iterations {
55        let mut new_heat = vec![0.0; n];
56        for (i, node) in all_nodes.iter().enumerate() {
57            let self_term = (1.0 - alpha) * heat[i];
58            let mut neighbor_sum = 0.0;
59            if let Some(neighbors) = adj.get(node) {
60                for neighbor in neighbors {
61                    if let Some(&j) = node_idx.get(neighbor.as_str()) {
62                        neighbor_sum += heat[j] / degrees[j];
63                    }
64                }
65            }
66            new_heat[i] = self_term + alpha * neighbor_sum;
67        }
68        heat = new_heat;
69    }
70
71    // PageRank centrality for gateway detection
72    let mut pagerank = vec![1.0 / n as f64; n];
73    let damping = 0.85;
74    for _ in 0..8 {
75        let mut new_pr = vec![(1.0 - damping) / n as f64; n];
76        for (i, node) in all_nodes.iter().enumerate() {
77            if let Some(neighbors) = adj.get(node) {
78                let out_deg = neighbors.len().max(1) as f64;
79                for neighbor in neighbors {
80                    if let Some(&j) = node_idx.get(neighbor.as_str()) {
81                        new_pr[j] += damping * pagerank[i] / out_deg;
82                    }
83                }
84            }
85        }
86        pagerank = new_pr;
87    }
88
89    // Combine: heat (primary) + pagerank centrality (gateway bonus)
90    let mut scores: HashMap<String, f64> = HashMap::new();
91    let heat_max = heat.iter().cloned().fold(0.0_f64, f64::max).max(1e-10);
92    let pr_max = pagerank.iter().cloned().fold(0.0_f64, f64::max).max(1e-10);
93
94    for (i, node) in all_nodes.iter().enumerate() {
95        let h = heat[i] / heat_max;
96        let pr = pagerank[i] / pr_max;
97        let combined = h * 0.8 + pr * 0.2;
98        if combined > 0.01 {
99            scores.insert(node.clone(), combined);
100        }
101    }
102
103    // Keyword boost
104    if !task_keywords.is_empty() {
105        let kw_lower: Vec<String> = task_keywords.iter().map(|k| k.to_lowercase()).collect();
106        for (file_path, file_entry) in &index.files {
107            let path_lower = file_path.to_lowercase();
108            let mut keyword_hits = 0;
109            for kw in &kw_lower {
110                if path_lower.contains(kw) {
111                    keyword_hits += 1;
112                }
113                for export in &file_entry.exports {
114                    if export.to_lowercase().contains(kw) {
115                        keyword_hits += 1;
116                    }
117                }
118            }
119            if keyword_hits > 0 {
120                let boost = (keyword_hits as f64 * 0.15).min(0.6);
121                let entry = scores.entry(file_path.clone()).or_insert(0.0);
122                *entry = (*entry + boost).min(1.0);
123            }
124        }
125    }
126
127    let mut result: Vec<RelevanceScore> = scores
128        .into_iter()
129        .map(|(path, score)| {
130            let mode = recommend_mode(score);
131            RelevanceScore {
132                path,
133                score,
134                recommended_mode: mode,
135            }
136        })
137        .collect();
138
139    result.sort_by(|a, b| {
140        b.score
141            .partial_cmp(&a.score)
142            .unwrap_or(std::cmp::Ordering::Equal)
143    });
144    result
145}
146
147pub fn compute_relevance_from_intent(
148    index: &ProjectIndex,
149    intent: &super::intent_engine::StructuredIntent,
150) -> Vec<RelevanceScore> {
151    use super::intent_engine::IntentScope;
152
153    let mut file_seeds: Vec<String> = Vec::new();
154    let mut extra_keywords: Vec<String> = intent.keywords.clone();
155
156    for target in &intent.targets {
157        if target.contains('.') || target.contains('/') {
158            let matched = resolve_target_to_files(index, target);
159            if matched.is_empty() {
160                extra_keywords.push(target.clone());
161            } else {
162                file_seeds.extend(matched);
163            }
164        } else {
165            let from_symbol = resolve_symbol_to_files(index, target);
166            if from_symbol.is_empty() {
167                extra_keywords.push(target.clone());
168            } else {
169                file_seeds.extend(from_symbol);
170            }
171        }
172    }
173
174    if let Some(lang) = &intent.language_hint {
175        let lang_ext = match lang.as_str() {
176            "rust" => Some("rs"),
177            "typescript" => Some("ts"),
178            "javascript" => Some("js"),
179            "python" => Some("py"),
180            "go" => Some("go"),
181            "ruby" => Some("rb"),
182            "java" => Some("java"),
183            _ => None,
184        };
185        if let Some(ext) = lang_ext {
186            if file_seeds.is_empty() {
187                for path in index.files.keys() {
188                    if path.ends_with(&format!(".{ext}")) {
189                        extra_keywords.push(
190                            std::path::Path::new(path)
191                                .file_stem()
192                                .and_then(|s| s.to_str())
193                                .unwrap_or("")
194                                .to_string(),
195                        );
196                        break;
197                    }
198                }
199            }
200        }
201    }
202
203    let mut result = compute_relevance(index, &file_seeds, &extra_keywords);
204
205    match intent.scope {
206        IntentScope::SingleFile => {
207            result.truncate(5);
208        }
209        IntentScope::MultiFile => {
210            result.truncate(15);
211        }
212        IntentScope::CrossModule | IntentScope::ProjectWide => {}
213    }
214
215    result
216}
217
218fn resolve_target_to_files(index: &ProjectIndex, target: &str) -> Vec<String> {
219    let mut matches = Vec::new();
220    for path in index.files.keys() {
221        if path.ends_with(target) || path.contains(target) {
222            matches.push(path.clone());
223        }
224    }
225    matches
226}
227
228fn resolve_symbol_to_files(index: &ProjectIndex, symbol: &str) -> Vec<String> {
229    let sym_lower = symbol.to_lowercase();
230    let mut matches = Vec::new();
231    for entry in index.symbols.values() {
232        let name_lower = entry.name.to_lowercase();
233        if (name_lower == sym_lower || name_lower.contains(&sym_lower))
234            && !matches.contains(&entry.file)
235        {
236            matches.push(entry.file.clone());
237        }
238    }
239    if matches.is_empty() {
240        for (path, file_entry) in &index.files {
241            if file_entry
242                .exports
243                .iter()
244                .any(|e| e.to_lowercase().contains(&sym_lower))
245                && !matches.contains(path)
246            {
247                matches.push(path.clone());
248            }
249        }
250    }
251    matches
252}
253
254fn recommend_mode(score: f64) -> &'static str {
255    if score >= 0.8 {
256        "full"
257    } else if score >= 0.5 {
258        "signatures"
259    } else if score >= 0.2 {
260        "map"
261    } else {
262        "reference"
263    }
264}
265
266/// Build adjacency with module-path → file-path resolution.
267/// Graph edges store file paths as `from` and Rust module paths as `to`
268/// (e.g. `crate::core::tokens::count_tokens`). We resolve `to` back to file
269/// paths so heat diffusion and PageRank can propagate across the graph.
270fn build_adjacency_resolved(index: &ProjectIndex) -> HashMap<String, Vec<String>> {
271    let module_to_file = build_module_map(index);
272    let mut adj: HashMap<String, Vec<String>> = HashMap::new();
273
274    for edge in &index.edges {
275        let from = &edge.from;
276        let to_resolved = module_to_file
277            .get(&edge.to)
278            .cloned()
279            .unwrap_or_else(|| edge.to.clone());
280
281        if index.files.contains_key(from) && index.files.contains_key(&to_resolved) {
282            adj.entry(from.clone())
283                .or_default()
284                .push(to_resolved.clone());
285            adj.entry(to_resolved).or_default().push(from.clone());
286        }
287    }
288    adj
289}
290
291/// Map module/import paths to file paths using heuristics.
292/// e.g. `crate::core::tokens::count_tokens` → `rust/src/core/tokens.rs`
293fn build_module_map(index: &ProjectIndex) -> HashMap<String, String> {
294    let file_paths: Vec<&str> = index.files.keys().map(|s| s.as_str()).collect();
295    let mut mapping: HashMap<String, String> = HashMap::new();
296
297    let edge_targets: HashSet<String> = index.edges.iter().map(|e| e.to.clone()).collect();
298
299    for target in &edge_targets {
300        if index.files.contains_key(target) {
301            mapping.insert(target.clone(), target.clone());
302            continue;
303        }
304
305        if let Some(resolved) = resolve_module_to_file(target, &file_paths) {
306            mapping.insert(target.clone(), resolved);
307        }
308    }
309
310    mapping
311}
312
313fn resolve_module_to_file(module_path: &str, file_paths: &[&str]) -> Option<String> {
314    let cleaned = module_path
315        .trim_start_matches("crate::")
316        .trim_start_matches("super::");
317
318    // Strip trailing symbol (e.g. `core::tokens::count_tokens` → `core::tokens`)
319    let parts: Vec<&str> = cleaned.split("::").collect();
320
321    // Try progressively shorter prefixes to find a matching file
322    for end in (1..=parts.len()).rev() {
323        let candidate = parts[..end].join("/");
324
325        // Try as .rs file
326        for fp in file_paths {
327            let fp_normalized = fp
328                .trim_start_matches("rust/src/")
329                .trim_start_matches("src/");
330
331            if fp_normalized == format!("{candidate}.rs")
332                || fp_normalized == format!("{candidate}/mod.rs")
333                || fp.ends_with(&format!("/{candidate}.rs"))
334                || fp.ends_with(&format!("/{candidate}/mod.rs"))
335            {
336                return Some(fp.to_string());
337            }
338        }
339    }
340
341    // Fallback: match by last segment as filename stem
342    if let Some(last) = parts.last() {
343        let stem = format!("{last}.rs");
344        for fp in file_paths {
345            if fp.ends_with(&stem) {
346                return Some(fp.to_string());
347            }
348        }
349    }
350
351    None
352}
353
354/// Extract likely task-relevant file paths and keywords from a task description.
355pub fn parse_task_hints(task_description: &str) -> (Vec<String>, Vec<String>) {
356    let mut files = Vec::new();
357    let mut keywords = Vec::new();
358
359    for word in task_description.split_whitespace() {
360        let clean = word.trim_matches(|c: char| {
361            !c.is_alphanumeric() && c != '.' && c != '/' && c != '_' && c != '-'
362        });
363        if clean.contains('.')
364            && (clean.contains('/')
365                || clean.ends_with(".rs")
366                || clean.ends_with(".ts")
367                || clean.ends_with(".py")
368                || clean.ends_with(".go")
369                || clean.ends_with(".js"))
370        {
371            files.push(clean.to_string());
372        } else if clean.len() >= 3 && !STOP_WORDS.contains(&clean.to_lowercase().as_str()) {
373            keywords.push(clean.to_string());
374        }
375    }
376
377    (files, keywords)
378}
379
380const STOP_WORDS: &[&str] = &[
381    "the", "and", "for", "that", "this", "with", "from", "have", "has", "was", "are", "been",
382    "not", "but", "all", "can", "had", "her", "one", "our", "out", "you", "its", "will", "each",
383    "make", "like", "fix", "add", "use", "get", "set", "run", "new", "old", "should", "would",
384    "could", "into", "also", "than", "them", "then", "when", "just", "only", "very", "some",
385    "more", "other", "nach", "und", "die", "der", "das", "ist", "ein", "eine", "nicht", "auf",
386    "mit",
387];
388
389struct StructuralWeights {
390    error_handling: f64,
391    definition: f64,
392    control_flow: f64,
393    closing_brace: f64,
394    other: f64,
395}
396
397impl StructuralWeights {
398    const DEFAULT: Self = Self {
399        error_handling: 1.5,
400        definition: 1.0,
401        control_flow: 0.5,
402        closing_brace: 0.15,
403        other: 0.3,
404    };
405
406    fn for_task_type(task_type: Option<super::intent_engine::TaskType>) -> Self {
407        use super::intent_engine::TaskType;
408        match task_type {
409            Some(TaskType::FixBug) => Self {
410                error_handling: 2.0,
411                definition: 0.8,
412                control_flow: 0.8,
413                closing_brace: 0.1,
414                other: 0.2,
415            },
416            Some(TaskType::Debug) => Self {
417                error_handling: 2.0,
418                definition: 0.6,
419                control_flow: 1.0,
420                closing_brace: 0.1,
421                other: 0.2,
422            },
423            Some(TaskType::Generate) => Self {
424                error_handling: 0.8,
425                definition: 1.5,
426                control_flow: 0.3,
427                closing_brace: 0.15,
428                other: 0.4,
429            },
430            Some(TaskType::Refactor) => Self {
431                error_handling: 1.0,
432                definition: 1.5,
433                control_flow: 0.6,
434                closing_brace: 0.2,
435                other: 0.3,
436            },
437            Some(TaskType::Test) => Self {
438                error_handling: 1.2,
439                definition: 1.3,
440                control_flow: 0.4,
441                closing_brace: 0.15,
442                other: 0.3,
443            },
444            Some(TaskType::Review) => Self {
445                error_handling: 1.3,
446                definition: 1.2,
447                control_flow: 0.6,
448                closing_brace: 0.15,
449                other: 0.3,
450            },
451            Some(TaskType::Explore) | None => Self::DEFAULT,
452            Some(_) => Self::DEFAULT,
453        }
454    }
455}
456
457/// Information Bottleneck filter v3 — Mutual Information scoring, QUITO-X inspired.
458///
459/// IB principle: maximize I(T;Y) (task relevance) while minimizing I(T;X) (input redundancy).
460/// v3: MI(line, task) approximated via token overlap + IDF weighting + structural importance.
461///
462/// Key changes from v2:
463///   - Mutual Information scoring: MI(line, task) = H(line) - H(line|task)
464///   - Adaptive budget allocation based on task type via TaskClassifier
465///   - Token-level IDF computed over full document for better term weighting
466///   - Maintains L-curve attention, MMR dedup, error-handling priority from v2
467pub fn information_bottleneck_filter(
468    content: &str,
469    task_keywords: &[String],
470    budget_ratio: f64,
471) -> String {
472    information_bottleneck_filter_typed(content, task_keywords, budget_ratio, None)
473}
474
475/// Task-type-aware IB filter. Uses `TaskType` to adjust structural weights.
476pub fn information_bottleneck_filter_typed(
477    content: &str,
478    task_keywords: &[String],
479    budget_ratio: f64,
480    task_type: Option<super::intent_engine::TaskType>,
481) -> String {
482    let lines: Vec<&str> = content.lines().collect();
483    if lines.is_empty() {
484        return String::new();
485    }
486
487    let n = lines.len();
488    let kw_lower: Vec<String> = task_keywords.iter().map(|k| k.to_lowercase()).collect();
489    let attention = LearnedAttention::with_defaults();
490
491    let mut global_token_freq: HashMap<&str, usize> = HashMap::new();
492    for line in &lines {
493        for token in line.split_whitespace() {
494            *global_token_freq.entry(token).or_insert(0) += 1;
495        }
496    }
497    let total_unique = global_token_freq.len().max(1) as f64;
498    let total_lines = n.max(1) as f64;
499
500    let task_token_set: HashSet<String> = kw_lower
501        .iter()
502        .flat_map(|kw| kw.split(|c: char| !c.is_alphanumeric()).map(String::from))
503        .filter(|t| t.len() >= 2)
504        .collect();
505
506    let effective_ratio = if !task_token_set.is_empty() {
507        adaptive_ib_budget(content, budget_ratio)
508    } else {
509        budget_ratio
510    };
511
512    let weights = StructuralWeights::for_task_type(task_type);
513
514    let mut scored_lines: Vec<(usize, &str, f64)> = lines
515        .iter()
516        .enumerate()
517        .map(|(i, line)| {
518            let trimmed = line.trim();
519            if trimmed.is_empty() {
520                return (i, *line, 0.05);
521            }
522
523            let line_lower = trimmed.to_lowercase();
524            let line_tokens: Vec<&str> = trimmed.split_whitespace().collect();
525            let line_token_count = line_tokens.len().max(1) as f64;
526
527            let mi_score = if task_token_set.is_empty() {
528                0.0
529            } else {
530                let line_token_set: HashSet<String> =
531                    line_tokens.iter().map(|t| t.to_lowercase()).collect();
532                let overlap: f64 = line_token_set
533                    .iter()
534                    .filter(|t| task_token_set.iter().any(|kw| t.contains(kw.as_str())))
535                    .map(|t| {
536                        let freq = *global_token_freq.get(t.as_str()).unwrap_or(&1) as f64;
537                        (total_lines / freq).ln().max(0.1)
538                    })
539                    .sum();
540                overlap / line_token_count
541            };
542
543            let keyword_hits: f64 = kw_lower
544                .iter()
545                .filter(|kw| line_lower.contains(kw.as_str()))
546                .count() as f64;
547
548            let structural = if is_error_handling(trimmed) {
549                weights.error_handling
550            } else if is_definition_line(trimmed) {
551                weights.definition
552            } else if is_control_flow(trimmed) {
553                weights.control_flow
554            } else if is_closing_brace(trimmed) {
555                weights.closing_brace
556            } else {
557                weights.other
558            };
559            let relevance = mi_score * 0.4 + keyword_hits * 0.3 + structural;
560
561            let unique_in_line = line_tokens.iter().collect::<HashSet<_>>().len() as f64;
562            let token_diversity = unique_in_line / line_token_count;
563
564            let avg_idf: f64 = if line_tokens.is_empty() {
565                0.0
566            } else {
567                line_tokens
568                    .iter()
569                    .map(|t| {
570                        let freq = *global_token_freq.get(t).unwrap_or(&1) as f64;
571                        (total_unique / freq).ln().max(0.0)
572                    })
573                    .sum::<f64>()
574                    / line_token_count
575            };
576            let information = (token_diversity * 0.4 + (avg_idf.min(3.0) / 3.0) * 0.6).min(1.0);
577
578            let pos = i as f64 / n.max(1) as f64;
579            let attn_weight = attention.weight(pos);
580
581            let score = (relevance * 0.6 + 0.05)
582                * (information * 0.25 + 0.05)
583                * (attn_weight * 0.15 + 0.05);
584
585            (i, *line, score)
586        })
587        .collect();
588
589    let budget = ((n as f64) * effective_ratio).ceil() as usize;
590
591    scored_lines.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
592
593    let selected = mmr_select(&scored_lines, budget, 0.3);
594
595    let mut output_lines: Vec<&str> = Vec::with_capacity(budget + 1);
596
597    if !kw_lower.is_empty() {
598        output_lines.push("");
599    }
600
601    for (_, line, _) in &selected {
602        output_lines.push(line);
603    }
604
605    if !kw_lower.is_empty() {
606        let summary = format!("[task: {}]", task_keywords.join(", "));
607        let mut result = summary;
608        result.push('\n');
609        result.push_str(&output_lines[1..].to_vec().join("\n"));
610        return result;
611    }
612
613    output_lines.join("\n")
614}
615
616/// Maximum Marginal Relevance selection — greedy selection that penalizes
617/// redundancy with already-selected lines using token-set Jaccard similarity.
618///
619/// MMR(i) = relevance(i) - lambda * max_{j in S} jaccard(i, j)
620fn mmr_select<'a>(
621    candidates: &[(usize, &'a str, f64)],
622    budget: usize,
623    lambda: f64,
624) -> Vec<(usize, &'a str, f64)> {
625    if candidates.is_empty() || budget == 0 {
626        return Vec::new();
627    }
628
629    let mut selected: Vec<(usize, &'a str, f64)> = Vec::with_capacity(budget);
630    let mut remaining: Vec<(usize, &'a str, f64)> = candidates.to_vec();
631
632    // Always take the top-scored line first
633    selected.push(remaining.remove(0));
634
635    while selected.len() < budget && !remaining.is_empty() {
636        let mut best_idx = 0;
637        let mut best_mmr = f64::NEG_INFINITY;
638
639        for (i, &(_, cand_line, cand_score)) in remaining.iter().enumerate() {
640            let cand_tokens: HashSet<&str> = cand_line.split_whitespace().collect();
641            if cand_tokens.is_empty() {
642                if cand_score > best_mmr {
643                    best_mmr = cand_score;
644                    best_idx = i;
645                }
646                continue;
647            }
648
649            let max_sim = selected
650                .iter()
651                .map(|&(_, sel_line, _)| {
652                    let sel_tokens: HashSet<&str> = sel_line.split_whitespace().collect();
653                    if sel_tokens.is_empty() {
654                        return 0.0;
655                    }
656                    let inter = cand_tokens.intersection(&sel_tokens).count();
657                    let union = cand_tokens.union(&sel_tokens).count();
658                    if union == 0 {
659                        0.0
660                    } else {
661                        inter as f64 / union as f64
662                    }
663                })
664                .fold(0.0_f64, f64::max);
665
666            let mmr = cand_score - lambda * max_sim;
667            if mmr > best_mmr {
668                best_mmr = mmr;
669                best_idx = i;
670            }
671        }
672
673        selected.push(remaining.remove(best_idx));
674    }
675
676    selected
677}
678
679fn is_error_handling(line: &str) -> bool {
680    line.starts_with("return Err(")
681        || line.starts_with("Err(")
682        || line.starts_with("bail!(")
683        || line.starts_with("anyhow::bail!")
684        || line.contains(".map_err(")
685        || line.contains("unwrap()")
686        || line.contains("expect(\"")
687        || line.starts_with("raise ")
688        || line.starts_with("throw ")
689        || line.starts_with("catch ")
690        || line.starts_with("except ")
691        || line.starts_with("try ")
692        || (line.contains("?;") && !line.starts_with("//"))
693        || line.starts_with("panic!(")
694        || line.contains("Error::")
695        || line.contains("error!")
696}
697
698/// Compute an adaptive IB budget ratio based on content characteristics.
699/// Highly repetitive content → more aggressive filtering (lower ratio).
700/// High-entropy diverse content → more conservative (higher ratio).
701pub fn adaptive_ib_budget(content: &str, base_ratio: f64) -> f64 {
702    let lines: Vec<&str> = content.lines().collect();
703    if lines.len() < 10 {
704        return 1.0;
705    }
706
707    let mut token_freq: HashMap<&str, usize> = HashMap::new();
708    let mut total_tokens = 0usize;
709    for line in &lines {
710        for token in line.split_whitespace() {
711            *token_freq.entry(token).or_insert(0) += 1;
712            total_tokens += 1;
713        }
714    }
715
716    if total_tokens == 0 {
717        return base_ratio;
718    }
719
720    let unique_ratio = token_freq.len() as f64 / total_tokens as f64;
721    let repetition_factor = 1.0 - unique_ratio;
722
723    (base_ratio * (1.0 - repetition_factor * 0.3)).clamp(0.2, 1.0)
724}
725
726fn is_definition_line(line: &str) -> bool {
727    let prefixes = [
728        "fn ",
729        "pub fn ",
730        "async fn ",
731        "pub async fn ",
732        "struct ",
733        "pub struct ",
734        "enum ",
735        "pub enum ",
736        "trait ",
737        "pub trait ",
738        "impl ",
739        "type ",
740        "pub type ",
741        "const ",
742        "pub const ",
743        "static ",
744        "pub static ",
745        "class ",
746        "export class ",
747        "interface ",
748        "export interface ",
749        "function ",
750        "export function ",
751        "async function ",
752        "def ",
753        "async def ",
754        "func ",
755    ];
756    prefixes
757        .iter()
758        .any(|p| line.starts_with(p) || line.trim_start().starts_with(p))
759}
760
761fn is_control_flow(line: &str) -> bool {
762    let trimmed = line.trim();
763    trimmed.starts_with("if ")
764        || trimmed.starts_with("else ")
765        || trimmed.starts_with("match ")
766        || trimmed.starts_with("for ")
767        || trimmed.starts_with("while ")
768        || trimmed.starts_with("return ")
769        || trimmed.starts_with("break")
770        || trimmed.starts_with("continue")
771        || trimmed.starts_with("yield")
772        || trimmed.starts_with("await ")
773}
774
775fn is_closing_brace(line: &str) -> bool {
776    let trimmed = line.trim();
777    trimmed == "}" || trimmed == "};" || trimmed == "})" || trimmed == "});"
778}
779
780#[cfg(test)]
781mod tests {
782    use super::*;
783
784    #[test]
785    fn parse_task_finds_files_and_keywords() {
786        let (files, keywords) =
787            parse_task_hints("Fix the authentication bug in src/auth.rs and update tests");
788        assert!(files.iter().any(|f| f.contains("auth.rs")));
789        assert!(keywords
790            .iter()
791            .any(|k| k.to_lowercase().contains("authentication")));
792    }
793
794    #[test]
795    fn recommend_mode_by_score() {
796        assert_eq!(recommend_mode(1.0), "full");
797        assert_eq!(recommend_mode(0.6), "signatures");
798        assert_eq!(recommend_mode(0.3), "map");
799        assert_eq!(recommend_mode(0.1), "reference");
800    }
801
802    #[test]
803    fn info_bottleneck_preserves_definitions() {
804        let content = "fn main() {\n    let x = 42;\n    // boring comment\n    println!(x);\n}\n";
805        let result = information_bottleneck_filter(content, &["main".to_string()], 0.6);
806        assert!(result.contains("fn main"), "definitions must be preserved");
807        assert!(result.contains("[task: main]"), "should have task summary");
808    }
809
810    #[test]
811    fn info_bottleneck_error_handling_priority() {
812        let content = "fn validate() {\n    let data = parse()?;\n    return Err(\"invalid\");\n    let x = 1;\n    let y = 2;\n}\n";
813        let result = information_bottleneck_filter(content, &["validate".to_string()], 0.5);
814        assert!(
815            result.contains("return Err"),
816            "error handling should survive filtering"
817        );
818    }
819
820    #[test]
821    fn info_bottleneck_score_sorted() {
822        let content = "fn important() {\n    let x = 1;\n    let y = 2;\n    let z = 3;\n}\n}\n";
823        let result = information_bottleneck_filter(content, &[], 0.6);
824        let lines: Vec<&str> = result.lines().collect();
825        let def_pos = lines.iter().position(|l| l.contains("fn important"));
826        let brace_pos = lines.iter().position(|l| l.trim() == "}");
827        if let (Some(d), Some(b)) = (def_pos, brace_pos) {
828            assert!(
829                d < b,
830                "definitions should appear before closing braces in score-sorted output"
831            );
832        }
833    }
834
835    #[test]
836    fn adaptive_budget_reduces_for_repetitive() {
837        let repetitive = "let x = 1;\n".repeat(50);
838        let diverse = (0..50)
839            .map(|i| format!("let var_{i} = func_{i}(arg_{i});"))
840            .collect::<Vec<_>>()
841            .join("\n");
842        let budget_rep = super::adaptive_ib_budget(&repetitive, 0.7);
843        let budget_div = super::adaptive_ib_budget(&diverse, 0.7);
844        assert!(
845            budget_rep < budget_div,
846            "repetitive content should get lower budget"
847        );
848    }
849}