Skip to main content

lean_ctx/core/
task_relevance.rs

1use std::collections::{HashMap, HashSet};
2
3use super::graph_index::ProjectIndex;
4
5use super::neural::attention_learned::LearnedAttention;
6
7#[derive(Debug, Clone)]
8pub struct RelevanceScore {
9    pub path: String,
10    pub score: f64,
11    pub recommended_mode: &'static str,
12}
13
14pub fn compute_relevance(
15    index: &ProjectIndex,
16    task_files: &[String],
17    task_keywords: &[String],
18) -> Vec<RelevanceScore> {
19    let adj = build_adjacency_resolved(index);
20    let all_nodes: Vec<String> = index.files.keys().cloned().collect();
21    if all_nodes.is_empty() {
22        return Vec::new();
23    }
24
25    let node_idx: HashMap<&str, usize> = all_nodes
26        .iter()
27        .enumerate()
28        .map(|(i, n)| (n.as_str(), i))
29        .collect();
30    let n = all_nodes.len();
31
32    // Build degree-normalized adjacency for heat diffusion
33    let degrees: Vec<f64> = all_nodes
34        .iter()
35        .map(|node| {
36            adj.get(node)
37                .map_or(0.0, |neigh| neigh.len() as f64)
38                .max(1.0)
39        })
40        .collect();
41
42    // Seed vector: task files get 1.0
43    let mut heat: Vec<f64> = vec![0.0; n];
44    for f in task_files {
45        if let Some(&idx) = node_idx.get(f.as_str()) {
46            heat[idx] = 1.0;
47        }
48    }
49
50    // Heat diffusion: h(t+1) = (1-alpha)*h(t) + alpha * A_norm * h(t)
51    // Run for k iterations
52    let alpha = 0.5;
53    let iterations = 4;
54    for _ in 0..iterations {
55        let mut new_heat = vec![0.0; n];
56        for (i, node) in all_nodes.iter().enumerate() {
57            let self_term = (1.0 - alpha) * heat[i];
58            let mut neighbor_sum = 0.0;
59            if let Some(neighbors) = adj.get(node) {
60                for neighbor in neighbors {
61                    if let Some(&j) = node_idx.get(neighbor.as_str()) {
62                        neighbor_sum += heat[j] / degrees[j];
63                    }
64                }
65            }
66            new_heat[i] = self_term + alpha * neighbor_sum;
67        }
68        heat = new_heat;
69    }
70
71    // PageRank centrality for gateway detection
72    let mut pagerank = vec![1.0 / n as f64; n];
73    let damping = 0.85;
74    for _ in 0..8 {
75        let mut new_pr = vec![(1.0 - damping) / n as f64; n];
76        for (i, node) in all_nodes.iter().enumerate() {
77            if let Some(neighbors) = adj.get(node) {
78                let out_deg = neighbors.len().max(1) as f64;
79                for neighbor in neighbors {
80                    if let Some(&j) = node_idx.get(neighbor.as_str()) {
81                        new_pr[j] += damping * pagerank[i] / out_deg;
82                    }
83                }
84            }
85        }
86        pagerank = new_pr;
87    }
88
89    // Combine: heat (primary) + pagerank centrality (gateway bonus)
90    let mut scores: HashMap<String, f64> = HashMap::new();
91    let heat_max = heat.iter().cloned().fold(0.0_f64, f64::max).max(1e-10);
92    let pr_max = pagerank.iter().cloned().fold(0.0_f64, f64::max).max(1e-10);
93
94    for (i, node) in all_nodes.iter().enumerate() {
95        let h = heat[i] / heat_max;
96        let pr = pagerank[i] / pr_max;
97        let combined = h * 0.8 + pr * 0.2;
98        if combined > 0.01 {
99            scores.insert(node.clone(), combined);
100        }
101    }
102
103    // Keyword boost
104    if !task_keywords.is_empty() {
105        let kw_lower: Vec<String> = task_keywords.iter().map(|k| k.to_lowercase()).collect();
106        for (file_path, file_entry) in &index.files {
107            let path_lower = file_path.to_lowercase();
108            let mut keyword_hits = 0;
109            for kw in &kw_lower {
110                if path_lower.contains(kw) {
111                    keyword_hits += 1;
112                }
113                for export in &file_entry.exports {
114                    if export.to_lowercase().contains(kw) {
115                        keyword_hits += 1;
116                    }
117                }
118            }
119            if keyword_hits > 0 {
120                let boost = (keyword_hits as f64 * 0.15).min(0.6);
121                let entry = scores.entry(file_path.clone()).or_insert(0.0);
122                *entry = (*entry + boost).min(1.0);
123            }
124        }
125    }
126
127    let mut result: Vec<RelevanceScore> = scores
128        .into_iter()
129        .map(|(path, score)| {
130            let mode = recommend_mode(score);
131            RelevanceScore {
132                path,
133                score,
134                recommended_mode: mode,
135            }
136        })
137        .collect();
138
139    result.sort_by(|a, b| {
140        b.score
141            .partial_cmp(&a.score)
142            .unwrap_or(std::cmp::Ordering::Equal)
143    });
144    result
145}
146
147pub fn compute_relevance_from_intent(
148    index: &ProjectIndex,
149    intent: &super::intent_engine::StructuredIntent,
150) -> Vec<RelevanceScore> {
151    use super::intent_engine::IntentScope;
152
153    let mut file_seeds: Vec<String> = Vec::new();
154    let mut extra_keywords: Vec<String> = intent.keywords.clone();
155
156    for target in &intent.targets {
157        if target.contains('.') || target.contains('/') {
158            let matched = resolve_target_to_files(index, target);
159            if matched.is_empty() {
160                extra_keywords.push(target.clone());
161            } else {
162                file_seeds.extend(matched);
163            }
164        } else {
165            let from_symbol = resolve_symbol_to_files(index, target);
166            if from_symbol.is_empty() {
167                extra_keywords.push(target.clone());
168            } else {
169                file_seeds.extend(from_symbol);
170            }
171        }
172    }
173
174    if let Some(lang) = &intent.language_hint {
175        let lang_ext = match lang.as_str() {
176            "rust" => Some("rs"),
177            "typescript" => Some("ts"),
178            "javascript" => Some("js"),
179            "python" => Some("py"),
180            "go" => Some("go"),
181            "ruby" => Some("rb"),
182            "java" => Some("java"),
183            _ => None,
184        };
185        if let Some(ext) = lang_ext {
186            if file_seeds.is_empty() {
187                for path in index.files.keys() {
188                    if path.ends_with(&format!(".{ext}")) {
189                        extra_keywords.push(
190                            std::path::Path::new(path)
191                                .file_stem()
192                                .and_then(|s| s.to_str())
193                                .unwrap_or("")
194                                .to_string(),
195                        );
196                        break;
197                    }
198                }
199            }
200        }
201    }
202
203    let mut result = compute_relevance(index, &file_seeds, &extra_keywords);
204
205    match intent.scope {
206        IntentScope::SingleFile => {
207            result.truncate(5);
208        }
209        IntentScope::MultiFile => {
210            result.truncate(15);
211        }
212        IntentScope::CrossModule | IntentScope::ProjectWide => {}
213    }
214
215    result
216}
217
218fn resolve_target_to_files(index: &ProjectIndex, target: &str) -> Vec<String> {
219    let mut matches = Vec::new();
220    for path in index.files.keys() {
221        if path.ends_with(target) || path.contains(target) {
222            matches.push(path.clone());
223        }
224    }
225    matches
226}
227
228fn resolve_symbol_to_files(index: &ProjectIndex, symbol: &str) -> Vec<String> {
229    let sym_lower = symbol.to_lowercase();
230    let mut matches = Vec::new();
231    for entry in index.symbols.values() {
232        let name_lower = entry.name.to_lowercase();
233        if (name_lower == sym_lower || name_lower.contains(&sym_lower))
234            && !matches.contains(&entry.file)
235        {
236            matches.push(entry.file.clone());
237        }
238    }
239    if matches.is_empty() {
240        for (path, file_entry) in &index.files {
241            if file_entry
242                .exports
243                .iter()
244                .any(|e| e.to_lowercase().contains(&sym_lower))
245                && !matches.contains(path)
246            {
247                matches.push(path.clone());
248            }
249        }
250    }
251    matches
252}
253
254fn recommend_mode(score: f64) -> &'static str {
255    if score >= 0.8 {
256        "full"
257    } else if score >= 0.5 {
258        "signatures"
259    } else if score >= 0.2 {
260        "map"
261    } else {
262        "reference"
263    }
264}
265
266/// Build adjacency with module-path → file-path resolution.
267/// Graph edges store file paths as `from` and Rust module paths as `to`
268/// (e.g. `crate::core::tokens::count_tokens`). We resolve `to` back to file
269/// paths so heat diffusion and PageRank can propagate across the graph.
270fn build_adjacency_resolved(index: &ProjectIndex) -> HashMap<String, Vec<String>> {
271    let module_to_file = build_module_map(index);
272    let mut adj: HashMap<String, Vec<String>> = HashMap::new();
273
274    for edge in &index.edges {
275        let from = &edge.from;
276        let to_resolved = module_to_file
277            .get(&edge.to)
278            .cloned()
279            .unwrap_or_else(|| edge.to.clone());
280
281        if index.files.contains_key(from) && index.files.contains_key(&to_resolved) {
282            adj.entry(from.clone())
283                .or_default()
284                .push(to_resolved.clone());
285            adj.entry(to_resolved).or_default().push(from.clone());
286        }
287    }
288    adj
289}
290
291/// Map module/import paths to file paths using heuristics.
292/// e.g. `crate::core::tokens::count_tokens` → `rust/src/core/tokens.rs`
293fn build_module_map(index: &ProjectIndex) -> HashMap<String, String> {
294    let file_paths: Vec<&str> = index.files.keys().map(|s| s.as_str()).collect();
295    let mut mapping: HashMap<String, String> = HashMap::new();
296
297    let edge_targets: HashSet<String> = index.edges.iter().map(|e| e.to.clone()).collect();
298
299    for target in &edge_targets {
300        if index.files.contains_key(target) {
301            mapping.insert(target.clone(), target.clone());
302            continue;
303        }
304
305        if let Some(resolved) = resolve_module_to_file(target, &file_paths) {
306            mapping.insert(target.clone(), resolved);
307        }
308    }
309
310    mapping
311}
312
313fn resolve_module_to_file(module_path: &str, file_paths: &[&str]) -> Option<String> {
314    let cleaned = module_path
315        .trim_start_matches("crate::")
316        .trim_start_matches("super::");
317
318    // Strip trailing symbol (e.g. `core::tokens::count_tokens` → `core::tokens`)
319    let parts: Vec<&str> = cleaned.split("::").collect();
320
321    // Try progressively shorter prefixes to find a matching file
322    for end in (1..=parts.len()).rev() {
323        let candidate = parts[..end].join("/");
324
325        // Try as .rs file
326        for fp in file_paths {
327            let fp_normalized = fp
328                .trim_start_matches("rust/src/")
329                .trim_start_matches("src/");
330
331            if fp_normalized == format!("{candidate}.rs")
332                || fp_normalized == format!("{candidate}/mod.rs")
333                || fp.ends_with(&format!("/{candidate}.rs"))
334                || fp.ends_with(&format!("/{candidate}/mod.rs"))
335            {
336                return Some(fp.to_string());
337            }
338        }
339    }
340
341    // Fallback: match by last segment as filename stem
342    if let Some(last) = parts.last() {
343        let stem = format!("{last}.rs");
344        for fp in file_paths {
345            if fp.ends_with(&stem) {
346                return Some(fp.to_string());
347            }
348        }
349    }
350
351    None
352}
353
354/// Extract likely task-relevant file paths and keywords from a task description.
355pub fn parse_task_hints(task_description: &str) -> (Vec<String>, Vec<String>) {
356    let mut files = Vec::new();
357    let mut keywords = Vec::new();
358
359    for word in task_description.split_whitespace() {
360        let clean = word.trim_matches(|c: char| {
361            !c.is_alphanumeric() && c != '.' && c != '/' && c != '_' && c != '-'
362        });
363        if clean.contains('.')
364            && (clean.contains('/')
365                || clean.ends_with(".rs")
366                || clean.ends_with(".ts")
367                || clean.ends_with(".py")
368                || clean.ends_with(".go")
369                || clean.ends_with(".js"))
370        {
371            files.push(clean.to_string());
372        } else if clean.len() >= 3 && !STOP_WORDS.contains(&clean.to_lowercase().as_str()) {
373            keywords.push(clean.to_string());
374        }
375    }
376
377    (files, keywords)
378}
379
380const STOP_WORDS: &[&str] = &[
381    "the", "and", "for", "that", "this", "with", "from", "have", "has", "was", "are", "been",
382    "not", "but", "all", "can", "had", "her", "one", "our", "out", "you", "its", "will", "each",
383    "make", "like", "fix", "add", "use", "get", "set", "run", "new", "old", "should", "would",
384    "could", "into", "also", "than", "them", "then", "when", "just", "only", "very", "some",
385    "more", "other", "nach", "und", "die", "der", "das", "ist", "ein", "eine", "nicht", "auf",
386    "mit",
387];
388
389/// Structural scoring weights, varied by task type for better IB filtering.
390struct StructuralWeights {
391    error_handling: f64,
392    definition: f64,
393    control_flow: f64,
394    closing_brace: f64,
395    other: f64,
396}
397
398impl StructuralWeights {
399    const DEFAULT: Self = Self {
400        error_handling: 1.5,
401        definition: 1.0,
402        control_flow: 0.5,
403        closing_brace: 0.15,
404        other: 0.3,
405    };
406
407    fn for_task_type(task_type: Option<super::intent_engine::TaskType>) -> Self {
408        use super::intent_engine::TaskType;
409        match task_type {
410            Some(TaskType::FixBug) => Self {
411                error_handling: 2.0,
412                definition: 0.8,
413                control_flow: 0.8,
414                closing_brace: 0.1,
415                other: 0.2,
416            },
417            Some(TaskType::Debug) => Self {
418                error_handling: 2.0,
419                definition: 0.6,
420                control_flow: 1.0,
421                closing_brace: 0.1,
422                other: 0.2,
423            },
424            Some(TaskType::Generate) => Self {
425                error_handling: 0.8,
426                definition: 1.5,
427                control_flow: 0.3,
428                closing_brace: 0.15,
429                other: 0.4,
430            },
431            Some(TaskType::Refactor) => Self {
432                error_handling: 1.0,
433                definition: 1.5,
434                control_flow: 0.6,
435                closing_brace: 0.2,
436                other: 0.3,
437            },
438            Some(TaskType::Test) => Self {
439                error_handling: 1.2,
440                definition: 1.3,
441                control_flow: 0.4,
442                closing_brace: 0.15,
443                other: 0.3,
444            },
445            Some(TaskType::Review) => Self {
446                error_handling: 1.3,
447                definition: 1.2,
448                control_flow: 0.6,
449                closing_brace: 0.15,
450                other: 0.3,
451            },
452            Some(TaskType::Explore) | None => Self::DEFAULT,
453            Some(_) => Self::DEFAULT,
454        }
455    }
456}
457
458/// Information Bottleneck filter v3 — Mutual Information scoring, QUITO-X inspired.
459///
460/// IB principle: maximize I(T;Y) (task relevance) while minimizing I(T;X) (input redundancy).
461/// v3: MI(line, task) approximated via token overlap + IDF weighting + structural importance.
462///
463/// Key changes from v2:
464///   - Mutual Information scoring: MI(line, task) = H(line) - H(line|task)
465///   - Adaptive budget allocation based on task type via TaskClassifier
466///   - Token-level IDF computed over full document for better term weighting
467///   - Maintains L-curve attention, MMR dedup, error-handling priority from v2
468pub fn information_bottleneck_filter(
469    content: &str,
470    task_keywords: &[String],
471    budget_ratio: f64,
472) -> String {
473    information_bottleneck_filter_typed(content, task_keywords, budget_ratio, None)
474}
475
476/// Task-type-aware IB filter. Uses `TaskType` to adjust structural weights.
477pub fn information_bottleneck_filter_typed(
478    content: &str,
479    task_keywords: &[String],
480    budget_ratio: f64,
481    task_type: Option<super::intent_engine::TaskType>,
482) -> String {
483    let lines: Vec<&str> = content.lines().collect();
484    if lines.is_empty() {
485        return String::new();
486    }
487
488    let n = lines.len();
489    let kw_lower: Vec<String> = task_keywords.iter().map(|k| k.to_lowercase()).collect();
490    let attention = LearnedAttention::with_defaults();
491
492    let mut global_token_freq: HashMap<&str, usize> = HashMap::new();
493    for line in &lines {
494        for token in line.split_whitespace() {
495            *global_token_freq.entry(token).or_insert(0) += 1;
496        }
497    }
498    let total_unique = global_token_freq.len().max(1) as f64;
499    let total_lines = n.max(1) as f64;
500
501    let task_token_set: HashSet<String> = kw_lower
502        .iter()
503        .flat_map(|kw| kw.split(|c: char| !c.is_alphanumeric()).map(String::from))
504        .filter(|t| t.len() >= 2)
505        .collect();
506
507    let effective_ratio = if !task_token_set.is_empty() {
508        adaptive_ib_budget(content, budget_ratio)
509    } else {
510        budget_ratio
511    };
512
513    let weights = StructuralWeights::for_task_type(task_type);
514
515    let mut scored_lines: Vec<(usize, &str, f64)> = lines
516        .iter()
517        .enumerate()
518        .map(|(i, line)| {
519            let trimmed = line.trim();
520            if trimmed.is_empty() {
521                return (i, *line, 0.05);
522            }
523
524            let line_lower = trimmed.to_lowercase();
525            let line_tokens: Vec<&str> = trimmed.split_whitespace().collect();
526            let line_token_count = line_tokens.len().max(1) as f64;
527
528            let mi_score = if task_token_set.is_empty() {
529                0.0
530            } else {
531                let line_token_set: HashSet<String> =
532                    line_tokens.iter().map(|t| t.to_lowercase()).collect();
533                let overlap: f64 = line_token_set
534                    .iter()
535                    .filter(|t| task_token_set.iter().any(|kw| t.contains(kw.as_str())))
536                    .map(|t| {
537                        let freq = *global_token_freq.get(t.as_str()).unwrap_or(&1) as f64;
538                        (total_lines / freq).ln().max(0.1)
539                    })
540                    .sum();
541                overlap / line_token_count
542            };
543
544            let keyword_hits: f64 = kw_lower
545                .iter()
546                .filter(|kw| line_lower.contains(kw.as_str()))
547                .count() as f64;
548
549            let structural = if is_error_handling(trimmed) {
550                weights.error_handling
551            } else if is_definition_line(trimmed) {
552                weights.definition
553            } else if is_control_flow(trimmed) {
554                weights.control_flow
555            } else if is_closing_brace(trimmed) {
556                weights.closing_brace
557            } else {
558                weights.other
559            };
560            let relevance = mi_score * 0.4 + keyword_hits * 0.3 + structural;
561
562            let unique_in_line = line_tokens.iter().collect::<HashSet<_>>().len() as f64;
563            let token_diversity = unique_in_line / line_token_count;
564
565            let avg_idf: f64 = if line_tokens.is_empty() {
566                0.0
567            } else {
568                line_tokens
569                    .iter()
570                    .map(|t| {
571                        let freq = *global_token_freq.get(t).unwrap_or(&1) as f64;
572                        (total_unique / freq).ln().max(0.0)
573                    })
574                    .sum::<f64>()
575                    / line_token_count
576            };
577            let information = (token_diversity * 0.4 + (avg_idf.min(3.0) / 3.0) * 0.6).min(1.0);
578
579            let pos = i as f64 / n.max(1) as f64;
580            let attn_weight = attention.weight(pos);
581
582            let score = (relevance * 0.6 + 0.05)
583                * (information * 0.25 + 0.05)
584                * (attn_weight * 0.15 + 0.05);
585
586            (i, *line, score)
587        })
588        .collect();
589
590    let budget = ((n as f64) * effective_ratio).ceil() as usize;
591
592    scored_lines.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
593
594    let selected = mmr_select(&scored_lines, budget, 0.3);
595
596    let mut output_lines: Vec<&str> = Vec::with_capacity(budget + 1);
597
598    if !kw_lower.is_empty() {
599        output_lines.push("");
600    }
601
602    for (_, line, _) in &selected {
603        output_lines.push(line);
604    }
605
606    if !kw_lower.is_empty() {
607        let summary = format!("[task: {}]", task_keywords.join(", "));
608        let mut result = summary;
609        result.push('\n');
610        result.push_str(&output_lines[1..].to_vec().join("\n"));
611        return result;
612    }
613
614    output_lines.join("\n")
615}
616
617/// Maximum Marginal Relevance selection — greedy selection that penalizes
618/// redundancy with already-selected lines using token-set Jaccard similarity.
619///
620/// MMR(i) = relevance(i) - lambda * max_{j in S} jaccard(i, j)
621fn mmr_select<'a>(
622    candidates: &[(usize, &'a str, f64)],
623    budget: usize,
624    lambda: f64,
625) -> Vec<(usize, &'a str, f64)> {
626    if candidates.is_empty() || budget == 0 {
627        return Vec::new();
628    }
629
630    let mut selected: Vec<(usize, &'a str, f64)> = Vec::with_capacity(budget);
631    let mut remaining: Vec<(usize, &'a str, f64)> = candidates.to_vec();
632
633    // Always take the top-scored line first
634    selected.push(remaining.remove(0));
635
636    while selected.len() < budget && !remaining.is_empty() {
637        let mut best_idx = 0;
638        let mut best_mmr = f64::NEG_INFINITY;
639
640        for (i, &(_, cand_line, cand_score)) in remaining.iter().enumerate() {
641            let cand_tokens: HashSet<&str> = cand_line.split_whitespace().collect();
642            if cand_tokens.is_empty() {
643                if cand_score > best_mmr {
644                    best_mmr = cand_score;
645                    best_idx = i;
646                }
647                continue;
648            }
649
650            let max_sim = selected
651                .iter()
652                .map(|&(_, sel_line, _)| {
653                    let sel_tokens: HashSet<&str> = sel_line.split_whitespace().collect();
654                    if sel_tokens.is_empty() {
655                        return 0.0;
656                    }
657                    let inter = cand_tokens.intersection(&sel_tokens).count();
658                    let union = cand_tokens.union(&sel_tokens).count();
659                    if union == 0 {
660                        0.0
661                    } else {
662                        inter as f64 / union as f64
663                    }
664                })
665                .fold(0.0_f64, f64::max);
666
667            let mmr = cand_score - lambda * max_sim;
668            if mmr > best_mmr {
669                best_mmr = mmr;
670                best_idx = i;
671            }
672        }
673
674        selected.push(remaining.remove(best_idx));
675    }
676
677    selected
678}
679
680fn is_error_handling(line: &str) -> bool {
681    line.starts_with("return Err(")
682        || line.starts_with("Err(")
683        || line.starts_with("bail!(")
684        || line.starts_with("anyhow::bail!")
685        || line.contains(".map_err(")
686        || line.contains("unwrap()")
687        || line.contains("expect(\"")
688        || line.starts_with("raise ")
689        || line.starts_with("throw ")
690        || line.starts_with("catch ")
691        || line.starts_with("except ")
692        || line.starts_with("try ")
693        || (line.contains("?;") && !line.starts_with("//"))
694        || line.starts_with("panic!(")
695        || line.contains("Error::")
696        || line.contains("error!")
697}
698
699/// Compute an adaptive IB budget ratio based on content characteristics.
700/// Highly repetitive content → more aggressive filtering (lower ratio).
701/// High-entropy diverse content → more conservative (higher ratio).
702pub fn adaptive_ib_budget(content: &str, base_ratio: f64) -> f64 {
703    let lines: Vec<&str> = content.lines().collect();
704    if lines.len() < 10 {
705        return 1.0;
706    }
707
708    let mut token_freq: HashMap<&str, usize> = HashMap::new();
709    let mut total_tokens = 0usize;
710    for line in &lines {
711        for token in line.split_whitespace() {
712            *token_freq.entry(token).or_insert(0) += 1;
713            total_tokens += 1;
714        }
715    }
716
717    if total_tokens == 0 {
718        return base_ratio;
719    }
720
721    let unique_ratio = token_freq.len() as f64 / total_tokens as f64;
722    let repetition_factor = 1.0 - unique_ratio;
723
724    (base_ratio * (1.0 - repetition_factor * 0.3)).clamp(0.2, 1.0)
725}
726
727fn is_definition_line(line: &str) -> bool {
728    let prefixes = [
729        "fn ",
730        "pub fn ",
731        "async fn ",
732        "pub async fn ",
733        "struct ",
734        "pub struct ",
735        "enum ",
736        "pub enum ",
737        "trait ",
738        "pub trait ",
739        "impl ",
740        "type ",
741        "pub type ",
742        "const ",
743        "pub const ",
744        "static ",
745        "pub static ",
746        "class ",
747        "export class ",
748        "interface ",
749        "export interface ",
750        "function ",
751        "export function ",
752        "async function ",
753        "def ",
754        "async def ",
755        "func ",
756    ];
757    prefixes
758        .iter()
759        .any(|p| line.starts_with(p) || line.trim_start().starts_with(p))
760}
761
762fn is_control_flow(line: &str) -> bool {
763    let trimmed = line.trim();
764    trimmed.starts_with("if ")
765        || trimmed.starts_with("else ")
766        || trimmed.starts_with("match ")
767        || trimmed.starts_with("for ")
768        || trimmed.starts_with("while ")
769        || trimmed.starts_with("return ")
770        || trimmed.starts_with("break")
771        || trimmed.starts_with("continue")
772        || trimmed.starts_with("yield")
773        || trimmed.starts_with("await ")
774}
775
776fn is_closing_brace(line: &str) -> bool {
777    let trimmed = line.trim();
778    trimmed == "}" || trimmed == "};" || trimmed == "})" || trimmed == "});"
779}
780
781#[cfg(test)]
782mod tests {
783    use super::*;
784
785    #[test]
786    fn parse_task_finds_files_and_keywords() {
787        let (files, keywords) =
788            parse_task_hints("Fix the authentication bug in src/auth.rs and update tests");
789        assert!(files.iter().any(|f| f.contains("auth.rs")));
790        assert!(keywords
791            .iter()
792            .any(|k| k.to_lowercase().contains("authentication")));
793    }
794
795    #[test]
796    fn recommend_mode_by_score() {
797        assert_eq!(recommend_mode(1.0), "full");
798        assert_eq!(recommend_mode(0.6), "signatures");
799        assert_eq!(recommend_mode(0.3), "map");
800        assert_eq!(recommend_mode(0.1), "reference");
801    }
802
803    #[test]
804    fn info_bottleneck_preserves_definitions() {
805        let content = "fn main() {\n    let x = 42;\n    // boring comment\n    println!(x);\n}\n";
806        let result = information_bottleneck_filter(content, &["main".to_string()], 0.6);
807        assert!(result.contains("fn main"), "definitions must be preserved");
808        assert!(result.contains("[task: main]"), "should have task summary");
809    }
810
811    #[test]
812    fn info_bottleneck_error_handling_priority() {
813        let content = "fn validate() {\n    let data = parse()?;\n    return Err(\"invalid\");\n    let x = 1;\n    let y = 2;\n}\n";
814        let result = information_bottleneck_filter(content, &["validate".to_string()], 0.5);
815        assert!(
816            result.contains("return Err"),
817            "error handling should survive filtering"
818        );
819    }
820
821    #[test]
822    fn info_bottleneck_score_sorted() {
823        let content = "fn important() {\n    let x = 1;\n    let y = 2;\n    let z = 3;\n}\n}\n";
824        let result = information_bottleneck_filter(content, &[], 0.6);
825        let lines: Vec<&str> = result.lines().collect();
826        let def_pos = lines.iter().position(|l| l.contains("fn important"));
827        let brace_pos = lines.iter().position(|l| l.trim() == "}");
828        if let (Some(d), Some(b)) = (def_pos, brace_pos) {
829            assert!(
830                d < b,
831                "definitions should appear before closing braces in score-sorted output"
832            );
833        }
834    }
835
836    #[test]
837    fn adaptive_budget_reduces_for_repetitive() {
838        let repetitive = "let x = 1;\n".repeat(50);
839        let diverse = (0..50)
840            .map(|i| format!("let var_{i} = func_{i}(arg_{i});"))
841            .collect::<Vec<_>>()
842            .join("\n");
843        let budget_rep = super::adaptive_ib_budget(&repetitive, 0.7);
844        let budget_div = super::adaptive_ib_budget(&diverse, 0.7);
845        assert!(
846            budget_rep < budget_div,
847            "repetitive content should get lower budget"
848        );
849    }
850
851    #[test]
852    fn ib_fixbug_type_boosts_error_handling() {
853        use crate::core::intent_engine::TaskType;
854
855        let content = "\
856fn process() {
857    let data = fetch_data();
858    let parsed = parse(data);
859    return Err(\"invalid input\");
860    let x = 1;
861    let y = 2;
862    let z = 3;
863}";
864        let kw = vec!["process".to_string()];
865        let default_result = information_bottleneck_filter(content, &kw, 0.4);
866        let fixbug_result =
867            information_bottleneck_filter_typed(content, &kw, 0.4, Some(TaskType::FixBug));
868        assert!(
869            fixbug_result.contains("return Err"),
870            "FixBug should preserve error handling"
871        );
872        let _ = default_result;
873    }
874
875    #[test]
876    fn ib_generate_type_boosts_definitions() {
877        use crate::core::intent_engine::TaskType;
878
879        let content = "\
880fn main() {
881    let x = 1;
882}
883pub struct Config {
884    pub name: String,
885}
886fn helper() {
887    let y = 2;
888}";
889        let kw = vec!["config".to_string()];
890        let gen_result =
891            information_bottleneck_filter_typed(content, &kw, 0.4, Some(TaskType::Generate));
892        assert!(
893            gen_result.contains("pub struct Config"),
894            "Generate should prioritize definitions"
895        );
896    }
897
898    #[test]
899    fn structural_weights_default_matches_none() {
900        let w = StructuralWeights::for_task_type(None);
901        assert!((w.error_handling - 1.5).abs() < f64::EPSILON);
902        assert!((w.definition - 1.0).abs() < f64::EPSILON);
903    }
904
905    #[test]
906    fn resolve_target_to_files_matches_suffix() {
907        let mut index = ProjectIndex::new("/tmp/test");
908        index.files.insert(
909            "src/core/session.rs".to_string(),
910            crate::core::graph_index::FileEntry {
911                path: "src/core/session.rs".to_string(),
912                hash: String::new(),
913                language: "rust".to_string(),
914                line_count: 100,
915                token_count: 500,
916                exports: vec!["SessionState".to_string()],
917                summary: String::new(),
918            },
919        );
920        let result = resolve_target_to_files(&index, "session.rs");
921        assert_eq!(result, vec!["src/core/session.rs"]);
922    }
923
924    #[test]
925    fn resolve_symbol_finds_exported_name() {
926        let mut index = ProjectIndex::new("/tmp/test");
927        index.files.insert(
928            "src/config.rs".to_string(),
929            crate::core::graph_index::FileEntry {
930                path: "src/config.rs".to_string(),
931                hash: String::new(),
932                language: "rust".to_string(),
933                line_count: 50,
934                token_count: 200,
935                exports: vec!["Config".to_string(), "load_config".to_string()],
936                summary: String::new(),
937            },
938        );
939        let result = resolve_symbol_to_files(&index, "Config");
940        assert!(result.contains(&"src/config.rs".to_string()));
941    }
942
943    #[test]
944    fn intent_to_relevance_uses_targets_as_seeds() {
945        use crate::core::intent_engine::StructuredIntent;
946
947        let mut index = ProjectIndex::new("/tmp/test");
948        index.files.insert(
949            "src/auth.rs".to_string(),
950            crate::core::graph_index::FileEntry {
951                path: "src/auth.rs".to_string(),
952                hash: String::new(),
953                language: "rust".to_string(),
954                line_count: 100,
955                token_count: 500,
956                exports: vec!["authenticate".to_string()],
957                summary: String::new(),
958            },
959        );
960        index.files.insert(
961            "src/db.rs".to_string(),
962            crate::core::graph_index::FileEntry {
963                path: "src/db.rs".to_string(),
964                hash: String::new(),
965                language: "rust".to_string(),
966                line_count: 100,
967                token_count: 500,
968                exports: vec!["query".to_string()],
969                summary: String::new(),
970            },
971        );
972        index.edges.push(crate::core::graph_index::IndexEdge {
973            from: "src/auth.rs".to_string(),
974            to: "src/db.rs".to_string(),
975            kind: "imports".to_string(),
976        });
977
978        let intent = StructuredIntent::from_query("fix the auth bug in auth.rs");
979        let scores = compute_relevance_from_intent(&index, &intent);
980
981        assert!(!scores.is_empty());
982        let auth_score = scores.iter().find(|s| s.path == "src/auth.rs");
983        let db_score = scores.iter().find(|s| s.path == "src/db.rs");
984        assert!(auth_score.is_some(), "auth.rs should be in results");
985        if let (Some(a), Some(d)) = (auth_score, db_score) {
986            assert!(
987                a.score >= d.score,
988                "auth.rs ({}) should score >= db.rs ({})",
989                a.score,
990                d.score
991            );
992        }
993    }
994}