1use std::collections::HashMap;
15use std::sync::mpsc;
16use std::time::Duration;
17
18use crate::core::graph_provider;
19use crate::core::tokens::count_tokens;
20use crate::tools::CrpMode;
21
22const DEFAULT_SEMANTIC_BUDGET_MS: u64 = 2500;
28
29fn semantic_budget() -> Duration {
30 let ms = std::env::var("LEAN_CTX_COMPOSE_BUDGET_MS")
31 .ok()
32 .and_then(|v| v.parse::<u64>().ok())
33 .filter(|&v| v > 0)
34 .unwrap_or(DEFAULT_SEMANTIC_BUDGET_MS);
35 Duration::from_millis(ms)
36}
37
38const DEFAULT_SYMBOL_BUDGET_TOKENS: usize = 600;
42
43fn symbol_budget_tokens() -> usize {
44 std::env::var("LEAN_CTX_COMPOSE_SYMBOL_TOKENS")
45 .ok()
46 .and_then(|v| v.parse::<usize>().ok())
47 .filter(|&v| v > 0)
48 .unwrap_or(DEFAULT_SYMBOL_BUDGET_TOKENS)
49}
50
51const DEFAULT_GRAPH_BUDGET_MS: u64 = 1500;
56
57fn graph_budget() -> Duration {
58 let ms = std::env::var("LEAN_CTX_COMPOSE_GRAPH_BUDGET_MS")
59 .ok()
60 .and_then(|v| v.parse::<u64>().ok())
61 .filter(|&v| v > 0)
62 .unwrap_or(DEFAULT_GRAPH_BUDGET_MS);
63 Duration::from_millis(ms)
64}
65
66const SPREAD_DECAY: f64 = 0.6;
70const SPREAD_HOPS: usize = 3;
71const SPREAD_TOP_K: usize = 8;
73
74fn build_associative_block(project_root: &str, keywords: &[String]) -> String {
80 let Some(open) = graph_provider::open_or_build(project_root) else {
81 return String::new();
82 };
83 let gp = &open.provider;
84
85 let mut seed_files: Vec<String> = Vec::new();
87 for kw in keywords {
88 for sym in gp.find_symbols(kw, None, None) {
89 if !seed_files.contains(&sym.file) {
90 seed_files.push(sym.file);
91 }
92 }
93 }
94 if seed_files.is_empty() {
95 return String::new();
96 }
97
98 crate::core::cooccurrence::record_access(project_root, &seed_files);
101
102 let mut adjacency: HashMap<String, Vec<(String, f64)>> = HashMap::new();
105 let mut add_edge = |a: &str, b: &str, w: f64| {
106 adjacency
107 .entry(a.to_string())
108 .or_default()
109 .push((b.to_string(), w));
110 adjacency
111 .entry(b.to_string())
112 .or_default()
113 .push((a.to_string(), w));
114 };
115 for e in gp.edges() {
116 add_edge(&e.from, &e.to, if e.weight > 0.0 { e.weight } else { 1.0 });
117 }
118 let coaccess = crate::core::cooccurrence::load(project_root);
119 for sf in &seed_files {
120 for (nbr, w) in coaccess.related(sf, 16) {
121 add_edge(sf, &nbr, w);
122 }
123 }
124
125 let seeds: HashMap<String, f64> = seed_files.iter().map(|f| (f.clone(), 1.0)).collect();
126 let ranked = crate::core::spreading_activation::related_ranked(
127 &seeds,
128 &adjacency,
129 SPREAD_DECAY,
130 SPREAD_HOPS,
131 SPREAD_TOP_K,
132 );
133 if ranked.is_empty() {
134 return String::new();
135 }
136
137 let mut s = String::from("\n## Related (associative: import/call graph + learned co-access)\n");
138 for (file, activation) in ranked {
139 s.push_str(&format!("- {file} (activation {activation:.2})\n"));
140 }
141 s
142}
143
144fn associative_block_budgeted(project_root: &str, keywords: &[String]) -> String {
148 if keywords.is_empty() {
149 return String::new();
150 }
151 let (tx, rx) = mpsc::channel::<String>();
152 let root = project_root.to_string();
153 let kws = keywords.to_vec();
154 std::thread::spawn(move || {
155 let block = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
156 build_associative_block(&root, &kws)
157 }))
158 .unwrap_or_else(|_| {
159 tracing::warn!("[ctx_compose: associative block panicked; omitting section]");
160 String::new()
161 });
162 let _ = tx.send(block);
163 });
164 rx.recv_timeout(graph_budget()).unwrap_or_default()
165}
166
167const STOPWORDS: &[&str] = &[
169 "the",
170 "and",
171 "for",
172 "with",
173 "that",
174 "this",
175 "from",
176 "into",
177 "how",
178 "where",
179 "what",
180 "does",
181 "are",
182 "was",
183 "use",
184 "used",
185 "uses",
186 "add",
187 "all",
188 "any",
189 "can",
190 "get",
191 "set",
192 "via",
193 "out",
194 "its",
195 "his",
196 "her",
197 "you",
198 "your",
199 "our",
200 "find",
201 "show",
202 "list",
203 "make",
204 "when",
205 "then",
206 "has",
207 "have",
208 "had",
209 "not",
210 "but",
211 "see",
212 "function",
213 "method",
214 "class",
215 "code",
216 "file",
217 "files",
218 "implement",
219 "implementation",
220];
221
222fn extract_keywords(task: &str, max: usize) -> Vec<String> {
225 let mut seen = std::collections::HashSet::new();
226 let mut out = Vec::new();
227 for raw in task.split(|c: char| !(c.is_alphanumeric() || c == '_')) {
228 if raw.len() < 3 {
229 continue;
230 }
231 if STOPWORDS.contains(&raw.to_ascii_lowercase().as_str()) {
232 continue;
233 }
234 if seen.insert(raw.to_string()) {
235 out.push(raw.to_string());
236 if out.len() >= max {
237 break;
238 }
239 }
240 }
241 out
242}
243
244fn ranked_files_budgeted(task: &str, project_root: &str, crp_mode: CrpMode) -> String {
248 let shared_cache = crate::tools::ctx_semantic_search::get_thread_cache();
249 let (tx, rx) = mpsc::channel::<String>();
250 let task_owned = task.to_string();
251 let root_owned = project_root.to_string();
252
253 std::thread::spawn(move || {
254 if let Some(cache) = shared_cache {
255 crate::tools::ctx_semantic_search::set_thread_cache(cache);
256 }
257 let ranked = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
258 crate::tools::ctx_semantic_search::handle(
259 &task_owned,
260 &root_owned,
261 8,
262 crp_mode,
263 None,
264 None,
265 None,
266 Some(false),
267 Some(false),
268 )
269 }))
270 .unwrap_or_else(|_| {
271 tracing::warn!("[ctx_compose: semantic ranking panicked; omitting section]");
272 String::new()
273 });
274 let _ = tx.send(ranked);
277 });
278
279 match rx.recv_timeout(semantic_budget()) {
280 Ok(ranked) => ranked.trim().to_string(),
281 Err(_) => "(deferred — semantic index is warming; the exact matches below are \
282 authoritative for this call, and ranking will be instant on the next call)"
283 .to_string(),
284 }
285}
286
287pub fn handle(task: &str, project_root: &str, crp_mode: CrpMode) -> (String, usize) {
289 let task = task.trim();
290 if task.is_empty() {
291 return ("ERROR: task is required".to_string(), 0);
292 }
293
294 let keywords = extract_keywords(task, 6);
295 let allow_secret = crate::core::roles::active_role().io.allow_secret_paths;
296
297 let mut out = String::new();
298 out.push_str(&format!("TASK: {task}\n"));
299 if keywords.is_empty() {
300 out.push_str("KEYWORDS: (none extracted — using full task for ranking)\n");
301 } else {
302 out.push_str(&format!("KEYWORDS: {}\n", keywords.join(", ")));
303 }
304
305 out.push_str("\n## Ranked files (semantic)\n");
310 out.push_str(&ranked_files_budgeted(task, project_root, crp_mode));
311 out.push('\n');
312
313 if let Some(primary) = keywords.first() {
315 let (grep, _g) = crate::tools::ctx_search::handle(
316 primary,
317 project_root,
318 None,
319 10,
320 crp_mode,
321 true,
322 allow_secret,
323 );
324 out.push_str(&format!("\n## Exact matches: '{primary}'\n"));
325 out.push_str(grep.trim());
326 out.push('\n');
327 }
328
329 use crate::core::context_packing::{greedy_max_coverage, CoverageItem};
335 let mut snippets: Vec<String> = Vec::new();
336 let mut items: Vec<CoverageItem> = Vec::new();
337 for kw in &keywords {
338 if let Some((rendered, toks)) =
339 crate::tools::ctx_symbol::best_symbol_snippet(kw, project_root)
340 {
341 let mut terms: std::collections::HashSet<String> =
344 std::collections::HashSet::from([kw.clone()]);
345 for other in &keywords {
346 if other != kw && rendered.contains(other.as_str()) {
347 terms.insert(other.clone());
348 }
349 }
350 items.push(CoverageItem {
351 terms,
352 cost: toks.max(1),
353 });
354 snippets.push(rendered);
355 }
356 }
357 if !items.is_empty() {
358 let chosen = greedy_max_coverage(&items, symbol_budget_tokens(), |_| 1.0);
359 let mut seen = std::collections::HashSet::new();
360 let mut header_written = false;
361 for idx in chosen {
362 let rendered = snippets[idx].trim();
363 if rendered.is_empty() || !seen.insert(rendered.to_string()) {
364 continue;
365 }
366 if !header_written {
367 out.push_str("\n## Top symbols (bodies)\n");
368 header_written = true;
369 }
370 out.push_str(rendered);
371 out.push('\n');
372 }
373 }
374
375 out.push_str(&associative_block_budgeted(project_root, &keywords));
379
380 let sent = count_tokens(&out);
381 (out, sent)
382}
383
384#[cfg(test)]
385mod tests {
386 use super::*;
387
388 #[test]
389 fn extract_keywords_drops_stopwords_and_short_tokens() {
390 let kw = extract_keywords("How does the BM25Index cache work for ctx_search?", 6);
391 assert!(kw.contains(&"BM25Index".to_string()));
392 assert!(kw.contains(&"cache".to_string()));
393 assert!(kw.contains(&"ctx_search".to_string()));
394 assert!(!kw.iter().any(|k| k == "the" || k == "How" || k == "for"));
395 }
396
397 #[test]
398 fn extract_keywords_dedups_and_caps() {
399 let kw = extract_keywords("alpha alpha beta gamma delta epsilon zeta eta", 3);
400 assert_eq!(kw.len(), 3);
401 assert_eq!(kw[0], "alpha");
402 }
403
404 #[test]
405 fn empty_task_is_rejected() {
406 let (out, tok) = handle(" ", "/tmp", CrpMode::Off);
407 assert!(out.starts_with("ERROR"));
408 assert_eq!(tok, 0);
409 }
410}