Skip to main content

lean_ctx/core/
task_relevance.rs

1use std::collections::{HashMap, HashSet};
2
3use super::graph_index::ProjectIndex;
4
5use super::neural::attention_learned::LearnedAttention;
6
7#[derive(Debug, Clone)]
8pub struct RelevanceScore {
9    pub path: String,
10    pub score: f64,
11    pub recommended_mode: &'static str,
12}
13
14pub fn compute_relevance(
15    index: &ProjectIndex,
16    task_files: &[String],
17    task_keywords: &[String],
18) -> Vec<RelevanceScore> {
19    let adj = build_adjacency_resolved(index);
20    let all_nodes: Vec<String> = index.files.keys().cloned().collect();
21    if all_nodes.is_empty() {
22        return Vec::new();
23    }
24
25    let node_idx: HashMap<&str, usize> = all_nodes
26        .iter()
27        .enumerate()
28        .map(|(i, n)| (n.as_str(), i))
29        .collect();
30    let n = all_nodes.len();
31
32    // Build degree-normalized adjacency for heat diffusion
33    let degrees: Vec<f64> = all_nodes
34        .iter()
35        .map(|node| {
36            adj.get(node)
37                .map_or(0.0, |neigh| neigh.len() as f64)
38                .max(1.0)
39        })
40        .collect();
41
42    // Seed vector: task files get 1.0
43    let mut heat: Vec<f64> = vec![0.0; n];
44    for f in task_files {
45        if let Some(&idx) = node_idx.get(f.as_str()) {
46            heat[idx] = 1.0;
47        }
48    }
49
50    // Heat diffusion: h(t+1) = (1-alpha)*h(t) + alpha * A_norm * h(t)
51    // Run for k iterations
52    let alpha = 0.5;
53    let iterations = 4;
54    for _ in 0..iterations {
55        let mut new_heat = vec![0.0; n];
56        for (i, node) in all_nodes.iter().enumerate() {
57            let self_term = (1.0 - alpha) * heat[i];
58            let mut neighbor_sum = 0.0;
59            if let Some(neighbors) = adj.get(node) {
60                for neighbor in neighbors {
61                    if let Some(&j) = node_idx.get(neighbor.as_str()) {
62                        neighbor_sum += heat[j] / degrees[j];
63                    }
64                }
65            }
66            new_heat[i] = self_term + alpha * neighbor_sum;
67        }
68        heat = new_heat;
69    }
70
71    // PageRank centrality for gateway detection
72    let mut pagerank = vec![1.0 / n as f64; n];
73    let damping = 0.85;
74    for _ in 0..8 {
75        let mut new_pr = vec![(1.0 - damping) / n as f64; n];
76        for (i, node) in all_nodes.iter().enumerate() {
77            if let Some(neighbors) = adj.get(node) {
78                let out_deg = neighbors.len().max(1) as f64;
79                for neighbor in neighbors {
80                    if let Some(&j) = node_idx.get(neighbor.as_str()) {
81                        new_pr[j] += damping * pagerank[i] / out_deg;
82                    }
83                }
84            }
85        }
86        pagerank = new_pr;
87    }
88
89    // Combine: heat (primary) + pagerank centrality (gateway bonus)
90    let mut scores: HashMap<String, f64> = HashMap::new();
91    let heat_max = heat.iter().copied().fold(0.0_f64, f64::max).max(1e-10);
92    let pr_max = pagerank.iter().copied().fold(0.0_f64, f64::max).max(1e-10);
93
94    for (i, node) in all_nodes.iter().enumerate() {
95        let h = heat[i] / heat_max;
96        let pr = pagerank[i] / pr_max;
97        let combined = h * 0.8 + pr * 0.2;
98        if combined > 0.01 {
99            scores.insert(node.clone(), combined);
100        }
101    }
102
103    // Keyword boost
104    if !task_keywords.is_empty() {
105        let kw_lower: Vec<String> = task_keywords.iter().map(|k| k.to_lowercase()).collect();
106        for (file_path, file_entry) in &index.files {
107            let path_lower = file_path.to_lowercase();
108            let mut keyword_hits = 0;
109            for kw in &kw_lower {
110                if path_lower.contains(kw) {
111                    keyword_hits += 1;
112                }
113                for export in &file_entry.exports {
114                    if export.to_lowercase().contains(kw) {
115                        keyword_hits += 1;
116                    }
117                }
118            }
119            if keyword_hits > 0 {
120                let boost = (keyword_hits as f64 * 0.15).min(0.6);
121                let entry = scores.entry(file_path.clone()).or_insert(0.0);
122                *entry = (*entry + boost).min(1.0);
123            }
124        }
125    }
126
127    let mut result: Vec<RelevanceScore> = scores
128        .into_iter()
129        .map(|(path, score)| {
130            let mode = recommend_mode(score);
131            RelevanceScore {
132                path,
133                score,
134                recommended_mode: mode,
135            }
136        })
137        .collect();
138
139    result.sort_by(|a, b| {
140        b.score
141            .partial_cmp(&a.score)
142            .unwrap_or(std::cmp::Ordering::Equal)
143    });
144    result
145}
146
147pub fn compute_relevance_from_intent(
148    index: &ProjectIndex,
149    intent: &super::intent_engine::StructuredIntent,
150) -> Vec<RelevanceScore> {
151    use super::intent_engine::IntentScope;
152
153    let mut file_seeds: Vec<String> = Vec::new();
154    let mut extra_keywords: Vec<String> = intent.keywords.clone();
155
156    for target in &intent.targets {
157        if target.contains('.') || target.contains('/') {
158            let matched = resolve_target_to_files(index, target);
159            if matched.is_empty() {
160                extra_keywords.push(target.clone());
161            } else {
162                file_seeds.extend(matched);
163            }
164        } else {
165            let from_symbol = resolve_symbol_to_files(index, target);
166            if from_symbol.is_empty() {
167                extra_keywords.push(target.clone());
168            } else {
169                file_seeds.extend(from_symbol);
170            }
171        }
172    }
173
174    if let Some(lang) = &intent.language_hint {
175        let lang_ext = match lang.as_str() {
176            "rust" => Some("rs"),
177            "typescript" => Some("ts"),
178            "javascript" => Some("js"),
179            "python" => Some("py"),
180            "go" => Some("go"),
181            "ruby" => Some("rb"),
182            "java" => Some("java"),
183            _ => None,
184        };
185        if let Some(ext) = lang_ext {
186            if file_seeds.is_empty() {
187                for path in index.files.keys() {
188                    if path.ends_with(&format!(".{ext}")) {
189                        extra_keywords.push(
190                            std::path::Path::new(path)
191                                .file_stem()
192                                .and_then(|s| s.to_str())
193                                .unwrap_or("")
194                                .to_string(),
195                        );
196                        break;
197                    }
198                }
199            }
200        }
201    }
202
203    let mut result = compute_relevance(index, &file_seeds, &extra_keywords);
204
205    match intent.scope {
206        IntentScope::SingleFile => {
207            result.truncate(5);
208        }
209        IntentScope::MultiFile => {
210            result.truncate(15);
211        }
212        IntentScope::CrossModule | IntentScope::ProjectWide => {}
213    }
214
215    result
216}
217
218fn resolve_target_to_files(index: &ProjectIndex, target: &str) -> Vec<String> {
219    let mut matches = Vec::new();
220    for path in index.files.keys() {
221        if path.ends_with(target) || path.contains(target) {
222            matches.push(path.clone());
223        }
224    }
225    matches
226}
227
228fn resolve_symbol_to_files(index: &ProjectIndex, symbol: &str) -> Vec<String> {
229    let sym_lower = symbol.to_lowercase();
230    let mut matches = Vec::new();
231    for entry in index.symbols.values() {
232        let name_lower = entry.name.to_lowercase();
233        if (name_lower == sym_lower || name_lower.contains(&sym_lower))
234            && !matches.contains(&entry.file)
235        {
236            matches.push(entry.file.clone());
237        }
238    }
239    if matches.is_empty() {
240        for (path, file_entry) in &index.files {
241            if file_entry
242                .exports
243                .iter()
244                .any(|e| e.to_lowercase().contains(&sym_lower))
245                && !matches.contains(path)
246            {
247                matches.push(path.clone());
248            }
249        }
250    }
251    matches
252}
253
254fn recommend_mode(score: f64) -> &'static str {
255    if score >= 0.8 {
256        "full"
257    } else if score >= 0.5 {
258        "signatures"
259    } else if score >= 0.2 {
260        "map"
261    } else {
262        "reference"
263    }
264}
265
266/// Build adjacency with module-path → file-path resolution.
267/// Graph edges store file paths as `from` and Rust module paths as `to`
268/// (e.g. `crate::core::tokens::count_tokens`). We resolve `to` back to file
269/// paths so heat diffusion and PageRank can propagate across the graph.
270fn build_adjacency_resolved(index: &ProjectIndex) -> HashMap<String, Vec<String>> {
271    let module_to_file = build_module_map(index);
272    let mut adj: HashMap<String, Vec<String>> = HashMap::new();
273
274    for edge in &index.edges {
275        let from = &edge.from;
276        let to_resolved = module_to_file
277            .get(&edge.to)
278            .cloned()
279            .unwrap_or_else(|| edge.to.clone());
280
281        if index.files.contains_key(from) && index.files.contains_key(&to_resolved) {
282            adj.entry(from.clone())
283                .or_default()
284                .push(to_resolved.clone());
285            adj.entry(to_resolved).or_default().push(from.clone());
286        }
287    }
288    adj
289}
290
291/// Map module/import paths to file paths using heuristics.
292/// e.g. `crate::core::tokens::count_tokens` → `rust/src/core/tokens.rs`
293fn build_module_map(index: &ProjectIndex) -> HashMap<String, String> {
294    let file_paths: Vec<&str> = index
295        .files
296        .keys()
297        .map(std::string::String::as_str)
298        .collect();
299    let mut mapping: HashMap<String, String> = HashMap::new();
300
301    let edge_targets: HashSet<String> = index.edges.iter().map(|e| e.to.clone()).collect();
302
303    for target in &edge_targets {
304        if index.files.contains_key(target) {
305            mapping.insert(target.clone(), target.clone());
306            continue;
307        }
308
309        if let Some(resolved) = resolve_module_to_file(target, &file_paths) {
310            mapping.insert(target.clone(), resolved);
311        }
312    }
313
314    mapping
315}
316
317fn resolve_module_to_file(module_path: &str, file_paths: &[&str]) -> Option<String> {
318    let cleaned = module_path
319        .trim_start_matches("crate::")
320        .trim_start_matches("super::");
321
322    // Strip trailing symbol (e.g. `core::tokens::count_tokens` → `core::tokens`)
323    let parts: Vec<&str> = cleaned.split("::").collect();
324
325    // Try progressively shorter prefixes to find a matching file
326    for end in (1..=parts.len()).rev() {
327        let candidate = parts[..end].join("/");
328
329        // Try as .rs file
330        for fp in file_paths {
331            let fp_normalized = fp
332                .trim_start_matches("rust/src/")
333                .trim_start_matches("src/");
334
335            if fp_normalized == format!("{candidate}.rs")
336                || fp_normalized == format!("{candidate}/mod.rs")
337                || fp.ends_with(&format!("/{candidate}.rs"))
338                || fp.ends_with(&format!("/{candidate}/mod.rs"))
339            {
340                return Some(fp.to_string());
341            }
342        }
343    }
344
345    // Fallback: match by last segment as filename stem
346    if let Some(last) = parts.last() {
347        let stem = format!("{last}.rs");
348        for fp in file_paths {
349            if fp.ends_with(&stem) {
350                return Some(fp.to_string());
351            }
352        }
353    }
354
355    None
356}
357
358/// Extract likely task-relevant file paths and keywords from a task description.
359pub fn parse_task_hints(task_description: &str) -> (Vec<String>, Vec<String>) {
360    let mut files = Vec::new();
361    let mut keywords = Vec::new();
362
363    for word in task_description.split_whitespace() {
364        let clean = word.trim_matches(|c: char| {
365            !c.is_alphanumeric() && c != '.' && c != '/' && c != '_' && c != '-'
366        });
367        if clean.contains('.') && {
368            let p = std::path::Path::new(clean);
369            clean.contains('/')
370                || p.extension().is_some_and(|e| {
371                    e.eq_ignore_ascii_case("rs")
372                        || e.eq_ignore_ascii_case("ts")
373                        || e.eq_ignore_ascii_case("py")
374                        || e.eq_ignore_ascii_case("go")
375                        || e.eq_ignore_ascii_case("js")
376                })
377        } {
378            files.push(clean.to_string());
379        } else if clean.len() >= 3 && !STOP_WORDS.contains(&clean.to_lowercase().as_str()) {
380            keywords.push(clean.to_string());
381        }
382    }
383
384    (files, keywords)
385}
386
387const STOP_WORDS: &[&str] = &[
388    "the", "and", "for", "that", "this", "with", "from", "have", "has", "was", "are", "been",
389    "not", "but", "all", "can", "had", "her", "one", "our", "out", "you", "its", "will", "each",
390    "make", "like", "fix", "add", "use", "get", "set", "run", "new", "old", "should", "would",
391    "could", "into", "also", "than", "them", "then", "when", "just", "only", "very", "some",
392    "more", "other", "nach", "und", "die", "der", "das", "ist", "ein", "eine", "nicht", "auf",
393    "mit",
394];
395
396struct StructuralWeights {
397    error_handling: f64,
398    definition: f64,
399    control_flow: f64,
400    closing_brace: f64,
401    other: f64,
402}
403
404impl StructuralWeights {
405    const DEFAULT: Self = Self {
406        error_handling: 1.5,
407        definition: 1.0,
408        control_flow: 0.5,
409        closing_brace: 0.15,
410        other: 0.3,
411    };
412
413    fn for_task_type(task_type: Option<super::intent_engine::TaskType>) -> Self {
414        use super::intent_engine::TaskType;
415        match task_type {
416            Some(TaskType::FixBug) => Self {
417                error_handling: 2.0,
418                definition: 0.8,
419                control_flow: 0.8,
420                closing_brace: 0.1,
421                other: 0.2,
422            },
423            Some(TaskType::Debug) => Self {
424                error_handling: 2.0,
425                definition: 0.6,
426                control_flow: 1.0,
427                closing_brace: 0.1,
428                other: 0.2,
429            },
430            Some(TaskType::Generate) => Self {
431                error_handling: 0.8,
432                definition: 1.5,
433                control_flow: 0.3,
434                closing_brace: 0.15,
435                other: 0.4,
436            },
437            Some(TaskType::Refactor) => Self {
438                error_handling: 1.0,
439                definition: 1.5,
440                control_flow: 0.6,
441                closing_brace: 0.2,
442                other: 0.3,
443            },
444            Some(TaskType::Test) => Self {
445                error_handling: 1.2,
446                definition: 1.3,
447                control_flow: 0.4,
448                closing_brace: 0.15,
449                other: 0.3,
450            },
451            Some(TaskType::Review) => Self {
452                error_handling: 1.3,
453                definition: 1.2,
454                control_flow: 0.6,
455                closing_brace: 0.15,
456                other: 0.3,
457            },
458            None | Some(TaskType::Explore | _) => Self::DEFAULT,
459        }
460    }
461}
462
463/// Information Bottleneck filter v3 — Mutual Information scoring, QUITO-X inspired.
464///
465/// IB principle: maximize I(T;Y) (task relevance) while minimizing I(T;X) (input redundancy).
466/// v3: MI(line, task) approximated via token overlap + IDF weighting + structural importance.
467///
468/// Key changes from v2:
469///   - Mutual Information scoring: MI(line, task) = H(line) - H(line|task)
470///   - Adaptive budget allocation based on task type via TaskClassifier
471///   - Token-level IDF computed over full document for better term weighting
472///   - Maintains L-curve attention, MMR dedup, error-handling priority from v2
473pub fn information_bottleneck_filter(
474    content: &str,
475    task_keywords: &[String],
476    budget_ratio: f64,
477) -> String {
478    information_bottleneck_filter_typed(content, task_keywords, budget_ratio, None)
479}
480
481/// Task-type-aware IB filter. Uses `TaskType` to adjust structural weights.
482pub fn information_bottleneck_filter_typed(
483    content: &str,
484    task_keywords: &[String],
485    budget_ratio: f64,
486    task_type: Option<super::intent_engine::TaskType>,
487) -> String {
488    let lines: Vec<&str> = content.lines().collect();
489    if lines.is_empty() {
490        return String::new();
491    }
492
493    let n = lines.len();
494    let kw_lower: Vec<String> = task_keywords.iter().map(|k| k.to_lowercase()).collect();
495    let attention = LearnedAttention::with_defaults();
496
497    let mut global_token_freq: HashMap<&str, usize> = HashMap::new();
498    for line in &lines {
499        for token in line.split_whitespace() {
500            *global_token_freq.entry(token).or_insert(0) += 1;
501        }
502    }
503    let total_unique = global_token_freq.len().max(1) as f64;
504    let total_lines = n.max(1) as f64;
505
506    let task_token_set: HashSet<String> = kw_lower
507        .iter()
508        .flat_map(|kw| kw.split(|c: char| !c.is_alphanumeric()).map(String::from))
509        .filter(|t| t.len() >= 2)
510        .collect();
511
512    let effective_ratio = if task_token_set.is_empty() {
513        budget_ratio
514    } else {
515        adaptive_ib_budget(content, budget_ratio)
516    };
517
518    let weights = StructuralWeights::for_task_type(task_type);
519
520    let mut scored_lines: Vec<(usize, &str, f64)> = lines
521        .iter()
522        .enumerate()
523        .map(|(i, line)| {
524            let trimmed = line.trim();
525            if trimmed.is_empty() {
526                return (i, *line, 0.05);
527            }
528
529            let line_lower = trimmed.to_lowercase();
530            let line_tokens: Vec<&str> = trimmed.split_whitespace().collect();
531            let line_token_count = line_tokens.len().max(1) as f64;
532
533            let mi_score = if task_token_set.is_empty() {
534                0.0
535            } else {
536                let line_token_set: HashSet<String> =
537                    line_tokens.iter().map(|t| t.to_lowercase()).collect();
538                let overlap: f64 = line_token_set
539                    .iter()
540                    .filter(|t| task_token_set.iter().any(|kw| t.contains(kw.as_str())))
541                    .map(|t| {
542                        let freq = *global_token_freq.get(t.as_str()).unwrap_or(&1) as f64;
543                        (total_lines / freq).ln().max(0.1)
544                    })
545                    .sum();
546                overlap / line_token_count
547            };
548
549            let keyword_hits: f64 = kw_lower
550                .iter()
551                .filter(|kw| line_lower.contains(kw.as_str()))
552                .count() as f64;
553
554            let structural = if is_error_handling(trimmed) {
555                weights.error_handling
556            } else if is_definition_line(trimmed) {
557                weights.definition
558            } else if is_control_flow(trimmed) {
559                weights.control_flow
560            } else if is_closing_brace(trimmed) {
561                weights.closing_brace
562            } else {
563                weights.other
564            };
565            let relevance = mi_score * 0.4 + keyword_hits * 0.3 + structural;
566
567            let unique_in_line = line_tokens.iter().collect::<HashSet<_>>().len() as f64;
568            let token_diversity = unique_in_line / line_token_count;
569
570            let avg_idf: f64 = if line_tokens.is_empty() {
571                0.0
572            } else {
573                line_tokens
574                    .iter()
575                    .map(|t| {
576                        let freq = *global_token_freq.get(t).unwrap_or(&1) as f64;
577                        (total_unique / freq).ln().max(0.0)
578                    })
579                    .sum::<f64>()
580                    / line_token_count
581            };
582            let information = (token_diversity * 0.4 + (avg_idf.min(3.0) / 3.0) * 0.6).min(1.0);
583
584            let pos = i as f64 / n.max(1) as f64;
585            let attn_weight = attention.weight(pos);
586
587            let score = (relevance * 0.6 + 0.05)
588                * (information * 0.25 + 0.05)
589                * (attn_weight * 0.15 + 0.05);
590
591            (i, *line, score)
592        })
593        .collect();
594
595    let budget = ((n as f64) * effective_ratio).ceil() as usize;
596
597    scored_lines.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
598
599    let selected = mmr_select(&scored_lines, budget, 0.3);
600
601    let mut output_lines: Vec<&str> = Vec::with_capacity(budget + 1);
602
603    if !kw_lower.is_empty() {
604        output_lines.push("");
605    }
606
607    for (_, line, _) in &selected {
608        output_lines.push(line);
609    }
610
611    if !kw_lower.is_empty() {
612        let summary = format!("[task: {}]", task_keywords.join(", "));
613        let mut result = summary;
614        result.push('\n');
615        result.push_str(&output_lines[1..].to_vec().join("\n"));
616        return result;
617    }
618
619    output_lines.join("\n")
620}
621
622/// Maximum Marginal Relevance selection — greedy selection that penalizes
623/// redundancy with already-selected lines using token-set Jaccard similarity.
624///
625/// MMR(i) = relevance(i) - lambda * max_{j in S} jaccard(i, j)
626fn mmr_select<'a>(
627    candidates: &[(usize, &'a str, f64)],
628    budget: usize,
629    lambda: f64,
630) -> Vec<(usize, &'a str, f64)> {
631    if candidates.is_empty() || budget == 0 {
632        return Vec::new();
633    }
634
635    let mut selected: Vec<(usize, &'a str, f64)> = Vec::with_capacity(budget);
636    let mut remaining: Vec<(usize, &'a str, f64)> = candidates.to_vec();
637
638    // Always take the top-scored line first
639    selected.push(remaining.remove(0));
640
641    while selected.len() < budget && !remaining.is_empty() {
642        let mut best_idx = 0;
643        let mut best_mmr = f64::NEG_INFINITY;
644
645        for (i, &(_, cand_line, cand_score)) in remaining.iter().enumerate() {
646            let cand_tokens: HashSet<&str> = cand_line.split_whitespace().collect();
647            if cand_tokens.is_empty() {
648                if cand_score > best_mmr {
649                    best_mmr = cand_score;
650                    best_idx = i;
651                }
652                continue;
653            }
654
655            let max_sim = selected
656                .iter()
657                .map(|&(_, sel_line, _)| {
658                    let sel_tokens: HashSet<&str> = sel_line.split_whitespace().collect();
659                    if sel_tokens.is_empty() {
660                        return 0.0;
661                    }
662                    let inter = cand_tokens.intersection(&sel_tokens).count();
663                    let union = cand_tokens.union(&sel_tokens).count();
664                    if union == 0 {
665                        0.0
666                    } else {
667                        inter as f64 / union as f64
668                    }
669                })
670                .fold(0.0_f64, f64::max);
671
672            let mmr = cand_score - lambda * max_sim;
673            if mmr > best_mmr {
674                best_mmr = mmr;
675                best_idx = i;
676            }
677        }
678
679        selected.push(remaining.remove(best_idx));
680    }
681
682    selected
683}
684
685fn is_error_handling(line: &str) -> bool {
686    line.starts_with("return Err(")
687        || line.starts_with("Err(")
688        || line.starts_with("bail!(")
689        || line.starts_with("anyhow::bail!")
690        || line.contains(".map_err(")
691        || line.contains("unwrap()")
692        || line.contains("expect(\"")
693        || line.starts_with("raise ")
694        || line.starts_with("throw ")
695        || line.starts_with("catch ")
696        || line.starts_with("except ")
697        || line.starts_with("try ")
698        || (line.contains("?;") && !line.starts_with("//"))
699        || line.starts_with("panic!(")
700        || line.contains("Error::")
701        || line.contains("error!")
702}
703
704/// Compute an adaptive IB budget ratio based on content characteristics.
705/// Highly repetitive content → more aggressive filtering (lower ratio).
706/// High-entropy diverse content → more conservative (higher ratio).
707pub fn adaptive_ib_budget(content: &str, base_ratio: f64) -> f64 {
708    let lines: Vec<&str> = content.lines().collect();
709    if lines.len() < 10 {
710        return 1.0;
711    }
712
713    let mut token_freq: HashMap<&str, usize> = HashMap::new();
714    let mut total_tokens = 0usize;
715    for line in &lines {
716        for token in line.split_whitespace() {
717            *token_freq.entry(token).or_insert(0) += 1;
718            total_tokens += 1;
719        }
720    }
721
722    if total_tokens == 0 {
723        return base_ratio;
724    }
725
726    let unique_ratio = token_freq.len() as f64 / total_tokens as f64;
727    let repetition_factor = 1.0 - unique_ratio;
728
729    (base_ratio * (1.0 - repetition_factor * 0.3)).clamp(0.2, 1.0)
730}
731
732fn is_definition_line(line: &str) -> bool {
733    let prefixes = [
734        "fn ",
735        "pub fn ",
736        "async fn ",
737        "pub async fn ",
738        "struct ",
739        "pub struct ",
740        "enum ",
741        "pub enum ",
742        "trait ",
743        "pub trait ",
744        "impl ",
745        "type ",
746        "pub type ",
747        "const ",
748        "pub const ",
749        "static ",
750        "pub static ",
751        "class ",
752        "export class ",
753        "interface ",
754        "export interface ",
755        "function ",
756        "export function ",
757        "async function ",
758        "def ",
759        "async def ",
760        "func ",
761    ];
762    prefixes
763        .iter()
764        .any(|p| line.starts_with(p) || line.trim_start().starts_with(p))
765}
766
767fn is_control_flow(line: &str) -> bool {
768    let trimmed = line.trim();
769    trimmed.starts_with("if ")
770        || trimmed.starts_with("else ")
771        || trimmed.starts_with("match ")
772        || trimmed.starts_with("for ")
773        || trimmed.starts_with("while ")
774        || trimmed.starts_with("return ")
775        || trimmed.starts_with("break")
776        || trimmed.starts_with("continue")
777        || trimmed.starts_with("yield")
778        || trimmed.starts_with("await ")
779}
780
781fn is_closing_brace(line: &str) -> bool {
782    let trimmed = line.trim();
783    trimmed == "}" || trimmed == "};" || trimmed == "})" || trimmed == "});"
784}
785
786#[cfg(test)]
787mod tests {
788    use super::*;
789
790    #[test]
791    fn parse_task_finds_files_and_keywords() {
792        let (files, keywords) =
793            parse_task_hints("Fix the authentication bug in src/auth.rs and update tests");
794        assert!(files.iter().any(|f| f.contains("auth.rs")));
795        assert!(keywords
796            .iter()
797            .any(|k| k.to_lowercase().contains("authentication")));
798    }
799
800    #[test]
801    fn recommend_mode_by_score() {
802        assert_eq!(recommend_mode(1.0), "full");
803        assert_eq!(recommend_mode(0.6), "signatures");
804        assert_eq!(recommend_mode(0.3), "map");
805        assert_eq!(recommend_mode(0.1), "reference");
806    }
807
808    #[test]
809    fn info_bottleneck_preserves_definitions() {
810        let content = "fn main() {\n    let x = 42;\n    // boring comment\n    println!(x);\n}\n";
811        let result = information_bottleneck_filter(content, &["main".to_string()], 0.6);
812        assert!(result.contains("fn main"), "definitions must be preserved");
813        assert!(result.contains("[task: main]"), "should have task summary");
814    }
815
816    #[test]
817    fn info_bottleneck_error_handling_priority() {
818        let content = "fn validate() {\n    let data = parse()?;\n    return Err(\"invalid\");\n    let x = 1;\n    let y = 2;\n}\n";
819        let result = information_bottleneck_filter(content, &["validate".to_string()], 0.5);
820        assert!(
821            result.contains("return Err"),
822            "error handling should survive filtering"
823        );
824    }
825
826    #[test]
827    fn info_bottleneck_score_sorted() {
828        let content = "fn important() {\n    let x = 1;\n    let y = 2;\n    let z = 3;\n}\n}\n";
829        let result = information_bottleneck_filter(content, &[], 0.6);
830        let lines: Vec<&str> = result.lines().collect();
831        let def_pos = lines.iter().position(|l| l.contains("fn important"));
832        let brace_pos = lines.iter().position(|l| l.trim() == "}");
833        if let (Some(d), Some(b)) = (def_pos, brace_pos) {
834            assert!(
835                d < b,
836                "definitions should appear before closing braces in score-sorted output"
837            );
838        }
839    }
840
841    #[test]
842    fn adaptive_budget_reduces_for_repetitive() {
843        let repetitive = "let x = 1;\n".repeat(50);
844        let diverse = (0..50)
845            .map(|i| format!("let var_{i} = func_{i}(arg_{i});"))
846            .collect::<Vec<_>>()
847            .join("\n");
848        let budget_rep = super::adaptive_ib_budget(&repetitive, 0.7);
849        let budget_div = super::adaptive_ib_budget(&diverse, 0.7);
850        assert!(
851            budget_rep < budget_div,
852            "repetitive content should get lower budget"
853        );
854    }
855}