1use std::collections::{HashMap, HashSet, VecDeque};
2
3use super::graph_index::ProjectIndex;
4
5use super::attention_model::positional_attention;
6
7#[derive(Debug, Clone)]
8pub struct RelevanceScore {
9 pub path: String,
10 pub score: f64,
11 pub recommended_mode: &'static str,
12}
13
14pub fn compute_relevance(
15 index: &ProjectIndex,
16 task_files: &[String],
17 task_keywords: &[String],
18) -> Vec<RelevanceScore> {
19 let mut scores: HashMap<String, f64> = HashMap::new();
20
21 for f in task_files {
23 scores.insert(f.clone(), 1.0);
24 }
25
26 let adj = build_adjacency(index);
28 for seed in task_files {
29 let mut visited: HashSet<String> = HashSet::new();
30 let mut queue: VecDeque<(String, usize)> = VecDeque::new();
31 queue.push_back((seed.clone(), 0));
32 visited.insert(seed.clone());
33
34 while let Some((node, depth)) = queue.pop_front() {
35 if depth > 4 {
36 continue;
37 }
38 let decay = 1.0 / (1.0 + depth as f64).powi(2); let entry = scores.entry(node.clone()).or_insert(0.0);
40 *entry = entry.max(decay);
41
42 if let Some(neighbors) = adj.get(&node) {
43 for neighbor in neighbors {
44 if !visited.contains(neighbor) {
45 visited.insert(neighbor.clone());
46 queue.push_back((neighbor.clone(), depth + 1));
47 }
48 }
49 }
50 }
51 }
52
53 if !task_keywords.is_empty() {
55 let kw_lower: Vec<String> = task_keywords.iter().map(|k| k.to_lowercase()).collect();
56 for (file_path, file_entry) in &index.files {
57 let path_lower = file_path.to_lowercase();
58 let mut keyword_hits = 0;
59 for kw in &kw_lower {
60 if path_lower.contains(kw) {
61 keyword_hits += 1;
62 }
63 for export in &file_entry.exports {
64 if export.to_lowercase().contains(kw) {
65 keyword_hits += 1;
66 }
67 }
68 }
69 if keyword_hits > 0 {
70 let boost = (keyword_hits as f64 * 0.15).min(0.6);
71 let entry = scores.entry(file_path.clone()).or_insert(0.0);
72 *entry = (*entry + boost).min(1.0);
73 }
74 }
75 }
76
77 let mut result: Vec<RelevanceScore> = scores
78 .into_iter()
79 .map(|(path, score)| {
80 let mode = recommend_mode(score);
81 RelevanceScore {
82 path,
83 score,
84 recommended_mode: mode,
85 }
86 })
87 .collect();
88
89 result.sort_by(|a, b| {
90 b.score
91 .partial_cmp(&a.score)
92 .unwrap_or(std::cmp::Ordering::Equal)
93 });
94 result
95}
96
97fn recommend_mode(score: f64) -> &'static str {
98 if score >= 0.8 {
99 "full"
100 } else if score >= 0.5 {
101 "signatures"
102 } else if score >= 0.2 {
103 "map"
104 } else {
105 "reference"
106 }
107}
108
109fn build_adjacency(index: &ProjectIndex) -> HashMap<String, Vec<String>> {
110 let mut adj: HashMap<String, Vec<String>> = HashMap::new();
111 for edge in &index.edges {
112 adj.entry(edge.from.clone())
113 .or_default()
114 .push(edge.to.clone());
115 adj.entry(edge.to.clone())
116 .or_default()
117 .push(edge.from.clone());
118 }
119 adj
120}
121
122pub fn parse_task_hints(task_description: &str) -> (Vec<String>, Vec<String>) {
124 let mut files = Vec::new();
125 let mut keywords = Vec::new();
126
127 for word in task_description.split_whitespace() {
128 let clean = word.trim_matches(|c: char| {
129 !c.is_alphanumeric() && c != '.' && c != '/' && c != '_' && c != '-'
130 });
131 if clean.contains('.')
132 && (clean.contains('/')
133 || clean.ends_with(".rs")
134 || clean.ends_with(".ts")
135 || clean.ends_with(".py")
136 || clean.ends_with(".go")
137 || clean.ends_with(".js"))
138 {
139 files.push(clean.to_string());
140 } else if clean.len() >= 3 && !STOP_WORDS.contains(&clean.to_lowercase().as_str()) {
141 keywords.push(clean.to_string());
142 }
143 }
144
145 (files, keywords)
146}
147
148const STOP_WORDS: &[&str] = &[
149 "the", "and", "for", "that", "this", "with", "from", "have", "has", "was", "are", "been",
150 "not", "but", "all", "can", "had", "her", "one", "our", "out", "you", "its", "will", "each",
151 "make", "like", "fix", "add", "use", "get", "set", "run", "new", "old", "should", "would",
152 "could", "into", "also", "than", "them", "then", "when", "just", "only", "very", "some",
153 "more", "other", "nach", "und", "die", "der", "das", "ist", "ein", "eine", "nicht", "auf",
154 "mit",
155];
156
157pub fn information_bottleneck_filter(
167 content: &str,
168 task_keywords: &[String],
169 budget_ratio: f64,
170) -> String {
171 let lines: Vec<&str> = content.lines().collect();
172 if lines.is_empty() {
173 return String::new();
174 }
175
176 let n = lines.len();
177 let kw_lower: Vec<String> = task_keywords.iter().map(|k| k.to_lowercase()).collect();
178
179 let mut global_token_freq: HashMap<&str, usize> = HashMap::new();
180 for line in &lines {
181 for token in line.split_whitespace() {
182 *global_token_freq.entry(token).or_insert(0) += 1;
183 }
184 }
185 let total_unique = global_token_freq.len().max(1) as f64;
186
187 let mut scored_lines: Vec<(usize, &str, f64)> = lines
188 .iter()
189 .enumerate()
190 .map(|(i, line)| {
191 let trimmed = line.trim();
192 if trimmed.is_empty() {
193 return (i, *line, 0.05);
194 }
195
196 let line_lower = trimmed.to_lowercase();
198 let keyword_hits: f64 = kw_lower
199 .iter()
200 .filter(|kw| line_lower.contains(kw.as_str()))
201 .count() as f64;
202 let structural = if is_definition_line(trimmed) {
203 1.0
204 } else if is_control_flow(trimmed) {
205 0.5
206 } else if is_closing_brace(trimmed) {
207 0.15
208 } else {
209 0.3
210 };
211 let relevance = keyword_hits * 0.5 + structural;
212
213 let line_tokens: Vec<&str> = trimmed.split_whitespace().collect();
215 let unique_in_line = line_tokens.iter().collect::<HashSet<_>>().len() as f64;
216 let line_token_count = line_tokens.len().max(1) as f64;
217 let token_diversity = unique_in_line / line_token_count;
218
219 let avg_idf: f64 = if line_tokens.is_empty() {
220 0.0
221 } else {
222 line_tokens
223 .iter()
224 .map(|t| {
225 let freq = *global_token_freq.get(t).unwrap_or(&1) as f64;
226 (total_unique / freq).ln().max(0.0)
227 })
228 .sum::<f64>()
229 / line_token_count
230 };
231 let information = (token_diversity * 0.4 + (avg_idf.min(3.0) / 3.0) * 0.6).min(1.0);
232
233 let pos = i as f64 / n.max(1) as f64;
235 let attention = positional_attention(pos, 0.9, 0.3, 0.85);
236
237 let score =
241 (relevance * 0.6 + 0.05) * (information * 0.25 + 0.05) * (attention * 0.15 + 0.05);
242
243 (i, *line, score)
244 })
245 .collect();
246
247 let budget = ((n as f64) * budget_ratio).ceil() as usize;
248
249 let mut by_score = scored_lines.clone();
250 by_score.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
251
252 let cutoff_score = if budget < by_score.len() {
253 by_score[budget].2
254 } else {
255 0.0
256 };
257
258 scored_lines.retain(|(_, _, score)| *score >= cutoff_score);
259
260 scored_lines
261 .iter()
262 .map(|(_, line, _)| *line)
263 .collect::<Vec<&str>>()
264 .join("\n")
265}
266
267pub fn adaptive_ib_budget(content: &str, base_ratio: f64) -> f64 {
271 let lines: Vec<&str> = content.lines().collect();
272 if lines.len() < 10 {
273 return 1.0;
274 }
275
276 let mut token_freq: HashMap<&str, usize> = HashMap::new();
277 let mut total_tokens = 0usize;
278 for line in &lines {
279 for token in line.split_whitespace() {
280 *token_freq.entry(token).or_insert(0) += 1;
281 total_tokens += 1;
282 }
283 }
284
285 if total_tokens == 0 {
286 return base_ratio;
287 }
288
289 let unique_ratio = token_freq.len() as f64 / total_tokens as f64;
290 let repetition_factor = 1.0 - unique_ratio;
291
292 (base_ratio * (1.0 - repetition_factor * 0.3)).clamp(0.2, 1.0)
293}
294
295fn is_definition_line(line: &str) -> bool {
296 let prefixes = [
297 "fn ",
298 "pub fn ",
299 "async fn ",
300 "pub async fn ",
301 "struct ",
302 "pub struct ",
303 "enum ",
304 "pub enum ",
305 "trait ",
306 "pub trait ",
307 "impl ",
308 "type ",
309 "pub type ",
310 "const ",
311 "pub const ",
312 "static ",
313 "pub static ",
314 "class ",
315 "export class ",
316 "interface ",
317 "export interface ",
318 "function ",
319 "export function ",
320 "async function ",
321 "def ",
322 "async def ",
323 "func ",
324 ];
325 prefixes
326 .iter()
327 .any(|p| line.starts_with(p) || line.trim_start().starts_with(p))
328}
329
330fn is_control_flow(line: &str) -> bool {
331 let trimmed = line.trim();
332 trimmed.starts_with("if ")
333 || trimmed.starts_with("else ")
334 || trimmed.starts_with("match ")
335 || trimmed.starts_with("for ")
336 || trimmed.starts_with("while ")
337 || trimmed.starts_with("return ")
338 || trimmed.starts_with("break")
339 || trimmed.starts_with("continue")
340 || trimmed.starts_with("yield")
341 || trimmed.starts_with("await ")
342}
343
344fn is_closing_brace(line: &str) -> bool {
345 let trimmed = line.trim();
346 trimmed == "}" || trimmed == "};" || trimmed == "})" || trimmed == "});"
347}
348
349#[cfg(test)]
350mod tests {
351 use super::*;
352
353 #[test]
354 fn parse_task_finds_files_and_keywords() {
355 let (files, keywords) =
356 parse_task_hints("Fix the authentication bug in src/auth.rs and update tests");
357 assert!(files.iter().any(|f| f.contains("auth.rs")));
358 assert!(keywords
359 .iter()
360 .any(|k| k.to_lowercase().contains("authentication")));
361 }
362
363 #[test]
364 fn recommend_mode_by_score() {
365 assert_eq!(recommend_mode(1.0), "full");
366 assert_eq!(recommend_mode(0.6), "signatures");
367 assert_eq!(recommend_mode(0.3), "map");
368 assert_eq!(recommend_mode(0.1), "reference");
369 }
370
371 #[test]
372 fn info_bottleneck_preserves_definitions() {
373 let content = "fn main() {\n let x = 42;\n // boring comment\n println!(x);\n}\n";
374 let result = information_bottleneck_filter(content, &["main".to_string()], 0.6);
375 assert!(result.contains("fn main"));
376 }
377
378 #[test]
379 fn adaptive_budget_reduces_for_repetitive() {
380 let repetitive = "let x = 1;\n".repeat(50);
381 let diverse = (0..50)
382 .map(|i| format!("let var_{i} = func_{i}(arg_{i});"))
383 .collect::<Vec<_>>()
384 .join("\n");
385 let budget_rep = super::adaptive_ib_budget(&repetitive, 0.7);
386 let budget_div = super::adaptive_ib_budget(&diverse, 0.7);
387 assert!(
388 budget_rep < budget_div,
389 "repetitive content should get lower budget"
390 );
391 }
392}