1use std::collections::{HashMap, HashSet, VecDeque};
2
3use super::graph_index::ProjectIndex;
4
5use super::attention_model::positional_attention;
6
7#[derive(Debug, Clone)]
8pub struct RelevanceScore {
9 pub path: String,
10 pub score: f64,
11 pub recommended_mode: &'static str,
12}
13
14pub fn compute_relevance(
15 index: &ProjectIndex,
16 task_files: &[String],
17 task_keywords: &[String],
18) -> Vec<RelevanceScore> {
19 let mut scores: HashMap<String, f64> = HashMap::new();
20
21 for f in task_files {
23 scores.insert(f.clone(), 1.0);
24 }
25
26 let adj = build_adjacency(index);
28 for seed in task_files {
29 let mut visited: HashSet<String> = HashSet::new();
30 let mut queue: VecDeque<(String, usize)> = VecDeque::new();
31 queue.push_back((seed.clone(), 0));
32 visited.insert(seed.clone());
33
34 while let Some((node, depth)) = queue.pop_front() {
35 if depth > 4 {
36 continue;
37 }
38 let decay = 1.0 / (1.0 + depth as f64).powi(2); let entry = scores.entry(node.clone()).or_insert(0.0);
40 *entry = entry.max(decay);
41
42 if let Some(neighbors) = adj.get(&node) {
43 for neighbor in neighbors {
44 if !visited.contains(neighbor) {
45 visited.insert(neighbor.clone());
46 queue.push_back((neighbor.clone(), depth + 1));
47 }
48 }
49 }
50 }
51 }
52
53 if !task_keywords.is_empty() {
55 let kw_lower: Vec<String> = task_keywords.iter().map(|k| k.to_lowercase()).collect();
56 for (file_path, file_entry) in &index.files {
57 let path_lower = file_path.to_lowercase();
58 let mut keyword_hits = 0;
59 for kw in &kw_lower {
60 if path_lower.contains(kw) {
61 keyword_hits += 1;
62 }
63 for export in &file_entry.exports {
64 if export.to_lowercase().contains(kw) {
65 keyword_hits += 1;
66 }
67 }
68 }
69 if keyword_hits > 0 {
70 let boost = (keyword_hits as f64 * 0.15).min(0.6);
71 let entry = scores.entry(file_path.clone()).or_insert(0.0);
72 *entry = (*entry + boost).min(1.0);
73 }
74 }
75 }
76
77 let mut result: Vec<RelevanceScore> = scores
78 .into_iter()
79 .map(|(path, score)| {
80 let mode = recommend_mode(score);
81 RelevanceScore {
82 path,
83 score,
84 recommended_mode: mode,
85 }
86 })
87 .collect();
88
89 result.sort_by(|a, b| {
90 b.score
91 .partial_cmp(&a.score)
92 .unwrap_or(std::cmp::Ordering::Equal)
93 });
94 result
95}
96
97fn recommend_mode(score: f64) -> &'static str {
98 if score >= 0.8 {
99 "full"
100 } else if score >= 0.5 {
101 "signatures"
102 } else if score >= 0.2 {
103 "map"
104 } else {
105 "reference"
106 }
107}
108
109fn build_adjacency(index: &ProjectIndex) -> HashMap<String, Vec<String>> {
110 let mut adj: HashMap<String, Vec<String>> = HashMap::new();
111 for edge in &index.edges {
112 adj.entry(edge.from.clone())
113 .or_default()
114 .push(edge.to.clone());
115 adj.entry(edge.to.clone())
116 .or_default()
117 .push(edge.from.clone());
118 }
119 adj
120}
121
122pub fn parse_task_hints(task_description: &str) -> (Vec<String>, Vec<String>) {
124 let mut files = Vec::new();
125 let mut keywords = Vec::new();
126
127 for word in task_description.split_whitespace() {
128 let clean = word.trim_matches(|c: char| {
129 !c.is_alphanumeric() && c != '.' && c != '/' && c != '_' && c != '-'
130 });
131 if clean.contains('.')
132 && (clean.contains('/')
133 || clean.ends_with(".rs")
134 || clean.ends_with(".ts")
135 || clean.ends_with(".py")
136 || clean.ends_with(".go")
137 || clean.ends_with(".js"))
138 {
139 files.push(clean.to_string());
140 } else if clean.len() >= 3 && !STOP_WORDS.contains(&clean.to_lowercase().as_str()) {
141 keywords.push(clean.to_string());
142 }
143 }
144
145 (files, keywords)
146}
147
148const STOP_WORDS: &[&str] = &[
149 "the", "and", "for", "that", "this", "with", "from", "have", "has", "was", "are", "been",
150 "not", "but", "all", "can", "had", "her", "one", "our", "out", "you", "its", "will", "each",
151 "make", "like", "fix", "add", "use", "get", "set", "run", "new", "old", "should", "would",
152 "could", "into", "also", "than", "them", "then", "when", "just", "only", "very", "some",
153 "more", "other", "nach", "und", "die", "der", "das", "ist", "ein", "eine", "nicht", "auf",
154 "mit",
155];
156
157pub fn information_bottleneck_filter(
167 content: &str,
168 task_keywords: &[String],
169 budget_ratio: f64,
170) -> String {
171 let lines: Vec<&str> = content.lines().collect();
172 if lines.is_empty() {
173 return String::new();
174 }
175
176 let n = lines.len();
177 let kw_lower: Vec<String> = task_keywords.iter().map(|k| k.to_lowercase()).collect();
178
179 let mut global_token_freq: HashMap<&str, usize> = HashMap::new();
180 for line in &lines {
181 for token in line.split_whitespace() {
182 *global_token_freq.entry(token).or_insert(0) += 1;
183 }
184 }
185 let total_unique = global_token_freq.len().max(1) as f64;
186
187 let mut scored_lines: Vec<(usize, &str, f64)> = lines
188 .iter()
189 .enumerate()
190 .map(|(i, line)| {
191 let trimmed = line.trim();
192 if trimmed.is_empty() {
193 return (i, *line, 0.05);
194 }
195
196 let line_lower = trimmed.to_lowercase();
198 let keyword_hits: f64 = kw_lower
199 .iter()
200 .filter(|kw| line_lower.contains(kw.as_str()))
201 .count() as f64;
202 let structural = if is_definition_line(trimmed) {
203 1.0
204 } else if is_control_flow(trimmed) {
205 0.5
206 } else if is_closing_brace(trimmed) {
207 0.15
208 } else {
209 0.3
210 };
211 let relevance = keyword_hits * 0.5 + structural;
212
213 let line_tokens: Vec<&str> = trimmed.split_whitespace().collect();
215 let unique_in_line = line_tokens.iter().collect::<HashSet<_>>().len() as f64;
216 let line_token_count = line_tokens.len().max(1) as f64;
217 let token_diversity = unique_in_line / line_token_count;
218
219 let avg_idf: f64 = if line_tokens.is_empty() {
220 0.0
221 } else {
222 line_tokens
223 .iter()
224 .map(|t| {
225 let freq = *global_token_freq.get(t).unwrap_or(&1) as f64;
226 (total_unique / freq).ln().max(0.0)
227 })
228 .sum::<f64>()
229 / line_token_count
230 };
231 let information = (token_diversity * 0.4 + (avg_idf.min(3.0) / 3.0) * 0.6).min(1.0);
232
233 let pos = i as f64 / n.max(1) as f64;
235 let attention = positional_attention(pos, 0.9, 0.3, 0.85);
236
237 let score = (relevance + 0.15) * (information * 0.3 + 0.7) * (attention * 0.3 + 0.7);
239
240 (i, *line, score)
241 })
242 .collect();
243
244 let budget = ((n as f64) * budget_ratio).ceil() as usize;
245
246 let mut by_score = scored_lines.clone();
247 by_score.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
248
249 let cutoff_score = if budget < by_score.len() {
250 by_score[budget].2
251 } else {
252 0.0
253 };
254
255 scored_lines.retain(|(_, _, score)| *score >= cutoff_score);
256
257 scored_lines
258 .iter()
259 .map(|(_, line, _)| *line)
260 .collect::<Vec<&str>>()
261 .join("\n")
262}
263
264pub fn adaptive_ib_budget(content: &str, base_ratio: f64) -> f64 {
268 let lines: Vec<&str> = content.lines().collect();
269 if lines.len() < 10 {
270 return 1.0;
271 }
272
273 let mut token_freq: HashMap<&str, usize> = HashMap::new();
274 let mut total_tokens = 0usize;
275 for line in &lines {
276 for token in line.split_whitespace() {
277 *token_freq.entry(token).or_insert(0) += 1;
278 total_tokens += 1;
279 }
280 }
281
282 if total_tokens == 0 {
283 return base_ratio;
284 }
285
286 let unique_ratio = token_freq.len() as f64 / total_tokens as f64;
287 let repetition_factor = 1.0 - unique_ratio;
288
289 (base_ratio * (1.0 - repetition_factor * 0.3)).clamp(0.2, 1.0)
290}
291
292fn is_definition_line(line: &str) -> bool {
293 let prefixes = [
294 "fn ",
295 "pub fn ",
296 "async fn ",
297 "pub async fn ",
298 "struct ",
299 "pub struct ",
300 "enum ",
301 "pub enum ",
302 "trait ",
303 "pub trait ",
304 "impl ",
305 "type ",
306 "pub type ",
307 "const ",
308 "pub const ",
309 "static ",
310 "pub static ",
311 "class ",
312 "export class ",
313 "interface ",
314 "export interface ",
315 "function ",
316 "export function ",
317 "async function ",
318 "def ",
319 "async def ",
320 "func ",
321 ];
322 prefixes
323 .iter()
324 .any(|p| line.starts_with(p) || line.trim_start().starts_with(p))
325}
326
327fn is_control_flow(line: &str) -> bool {
328 let trimmed = line.trim();
329 trimmed.starts_with("if ")
330 || trimmed.starts_with("else ")
331 || trimmed.starts_with("match ")
332 || trimmed.starts_with("for ")
333 || trimmed.starts_with("while ")
334 || trimmed.starts_with("return ")
335 || trimmed.starts_with("break")
336 || trimmed.starts_with("continue")
337 || trimmed.starts_with("yield")
338 || trimmed.starts_with("await ")
339}
340
341fn is_closing_brace(line: &str) -> bool {
342 let trimmed = line.trim();
343 trimmed == "}" || trimmed == "};" || trimmed == "})" || trimmed == "});"
344}
345
346#[cfg(test)]
347mod tests {
348 use super::*;
349
350 #[test]
351 fn parse_task_finds_files_and_keywords() {
352 let (files, keywords) =
353 parse_task_hints("Fix the authentication bug in src/auth.rs and update tests");
354 assert!(files.iter().any(|f| f.contains("auth.rs")));
355 assert!(keywords
356 .iter()
357 .any(|k| k.to_lowercase().contains("authentication")));
358 }
359
360 #[test]
361 fn recommend_mode_by_score() {
362 assert_eq!(recommend_mode(1.0), "full");
363 assert_eq!(recommend_mode(0.6), "signatures");
364 assert_eq!(recommend_mode(0.3), "map");
365 assert_eq!(recommend_mode(0.1), "reference");
366 }
367
368 #[test]
369 fn info_bottleneck_preserves_definitions() {
370 let content = "fn main() {\n let x = 42;\n // boring comment\n println!(x);\n}\n";
371 let result = information_bottleneck_filter(content, &["main".to_string()], 0.6);
372 assert!(result.contains("fn main"));
373 }
374
375 #[test]
376 fn adaptive_budget_reduces_for_repetitive() {
377 let repetitive = "let x = 1;\n".repeat(50);
378 let diverse = (0..50)
379 .map(|i| format!("let var_{i} = func_{i}(arg_{i});"))
380 .collect::<Vec<_>>()
381 .join("\n");
382 let budget_rep = super::adaptive_ib_budget(&repetitive, 0.7);
383 let budget_div = super::adaptive_ib_budget(&diverse, 0.7);
384 assert!(
385 budget_rep < budget_div,
386 "repetitive content should get lower budget"
387 );
388 }
389}