1use std::collections::HashMap;
15use std::sync::mpsc;
16use std::time::Duration;
17
18use crate::core::graph_provider;
19use crate::core::tokens::count_tokens;
20use crate::tools::CrpMode;
21
22const DEFAULT_SEMANTIC_BUDGET_MS: u64 = 2500;
28
29fn semantic_budget() -> Duration {
30 let ms = std::env::var("LEAN_CTX_COMPOSE_BUDGET_MS")
31 .ok()
32 .and_then(|v| v.parse::<u64>().ok())
33 .filter(|&v| v > 0)
34 .unwrap_or(DEFAULT_SEMANTIC_BUDGET_MS);
35 Duration::from_millis(ms)
36}
37
38const DEFAULT_SYMBOL_BUDGET_TOKENS: usize = 600;
42
43fn symbol_budget_tokens() -> usize {
44 std::env::var("LEAN_CTX_COMPOSE_SYMBOL_TOKENS")
45 .ok()
46 .and_then(|v| v.parse::<usize>().ok())
47 .filter(|&v| v > 0)
48 .unwrap_or(DEFAULT_SYMBOL_BUDGET_TOKENS)
49}
50
51const DEFAULT_GRAPH_BUDGET_MS: u64 = 1500;
56
57fn graph_budget() -> Duration {
58 let ms = std::env::var("LEAN_CTX_COMPOSE_GRAPH_BUDGET_MS")
59 .ok()
60 .and_then(|v| v.parse::<u64>().ok())
61 .filter(|&v| v > 0)
62 .unwrap_or(DEFAULT_GRAPH_BUDGET_MS);
63 Duration::from_millis(ms)
64}
65
66const SPREAD_DECAY: f64 = 0.6;
70const SPREAD_HOPS: usize = 3;
71const SPREAD_TOP_K: usize = 8;
73
74fn build_associative_block(project_root: &str, keywords: &[String]) -> String {
80 let Some(open) = graph_provider::open_or_build(project_root) else {
81 return String::new();
82 };
83 let gp = &open.provider;
84
85 let mut seed_files: Vec<String> = Vec::new();
87 for kw in keywords {
88 for sym in gp.find_symbols(kw, None, None) {
89 if !seed_files.contains(&sym.file) {
90 seed_files.push(sym.file);
91 }
92 }
93 }
94 if seed_files.is_empty() {
95 return String::new();
96 }
97
98 crate::core::cooccurrence::record_access(project_root, &seed_files);
101
102 let mut adjacency: HashMap<String, Vec<(String, f64)>> = HashMap::new();
105 let mut add_edge = |a: &str, b: &str, w: f64| {
106 adjacency
107 .entry(a.to_string())
108 .or_default()
109 .push((b.to_string(), w));
110 adjacency
111 .entry(b.to_string())
112 .or_default()
113 .push((a.to_string(), w));
114 };
115 for e in gp.edges() {
116 add_edge(&e.from, &e.to, if e.weight > 0.0 { e.weight } else { 1.0 });
117 }
118 let coaccess = crate::core::cooccurrence::load(project_root);
119 for sf in &seed_files {
120 for (nbr, w) in coaccess.related(sf, 16) {
121 add_edge(sf, &nbr, w);
122 }
123 }
124
125 let seeds: HashMap<String, f64> = seed_files.iter().map(|f| (f.clone(), 1.0)).collect();
126 let ranked = crate::core::spreading_activation::related_ranked(
127 &seeds,
128 &adjacency,
129 SPREAD_DECAY,
130 SPREAD_HOPS,
131 SPREAD_TOP_K,
132 );
133 if ranked.is_empty() {
134 return String::new();
135 }
136
137 let mut s = String::from("\n## Related (associative: import/call graph + learned co-access)\n");
138 for (file, activation) in ranked {
139 let file = crate::core::protocol::display_path(&file);
142 s.push_str(&format!("- {file} (activation {activation:.2})\n"));
143 }
144 s
145}
146
147fn associative_block_budgeted(project_root: &str, keywords: &[String]) -> String {
151 if keywords.is_empty() {
152 return String::new();
153 }
154 let (tx, rx) = mpsc::channel::<String>();
155 let root = project_root.to_string();
156 let kws = keywords.to_vec();
157 std::thread::spawn(move || {
158 let block = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
159 build_associative_block(&root, &kws)
160 }))
161 .unwrap_or_else(|_| {
162 tracing::warn!("[ctx_compose: associative block panicked; omitting section]");
163 String::new()
164 });
165 let _ = tx.send(block);
166 });
167 rx.recv_timeout(graph_budget()).unwrap_or_default()
168}
169
170const STOPWORDS: &[&str] = &[
172 "the",
173 "and",
174 "for",
175 "with",
176 "that",
177 "this",
178 "from",
179 "into",
180 "how",
181 "where",
182 "what",
183 "does",
184 "are",
185 "was",
186 "use",
187 "used",
188 "uses",
189 "add",
190 "all",
191 "any",
192 "can",
193 "get",
194 "set",
195 "via",
196 "out",
197 "its",
198 "his",
199 "her",
200 "you",
201 "your",
202 "our",
203 "find",
204 "show",
205 "list",
206 "make",
207 "when",
208 "then",
209 "has",
210 "have",
211 "had",
212 "not",
213 "but",
214 "see",
215 "function",
216 "method",
217 "class",
218 "code",
219 "file",
220 "files",
221 "implement",
222 "implementation",
223];
224
225fn extract_keywords(task: &str, max: usize) -> Vec<String> {
228 let mut seen = std::collections::HashSet::new();
229 let mut out = Vec::new();
230 for raw in task.split(|c: char| !(c.is_alphanumeric() || c == '_')) {
231 if raw.len() < 3 {
232 continue;
233 }
234 if STOPWORDS.contains(&raw.to_ascii_lowercase().as_str()) {
235 continue;
236 }
237 if seen.insert(raw.to_string()) {
238 out.push(raw.to_string());
239 if out.len() >= max {
240 break;
241 }
242 }
243 }
244 out
245}
246
247fn ranked_files_budgeted(task: &str, project_root: &str, crp_mode: CrpMode) -> String {
251 let shared_cache = crate::tools::ctx_semantic_search::get_thread_cache();
252 let (tx, rx) = mpsc::channel::<String>();
253 let task_owned = task.to_string();
254 let root_owned = project_root.to_string();
255
256 std::thread::spawn(move || {
257 if let Some(cache) = shared_cache {
258 crate::tools::ctx_semantic_search::set_thread_cache(cache);
259 }
260 let ranked = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
261 crate::tools::ctx_semantic_search::handle(
262 &task_owned,
263 &root_owned,
264 8,
265 crp_mode,
266 None,
267 None,
268 None,
269 Some(false),
270 Some(false),
271 )
272 }))
273 .unwrap_or_else(|_| {
274 tracing::warn!("[ctx_compose: semantic ranking panicked; omitting section]");
275 String::new()
276 });
277 let _ = tx.send(ranked);
280 });
281
282 match rx.recv_timeout(semantic_budget()) {
283 Ok(ranked) => ranked.trim().to_string(),
284 Err(_) => deferred_ranking_note(project_root),
285 }
286}
287
288fn deferred_ranking_note(project_root: &str) -> String {
297 let exact = "the exact matches below are authoritative for this call";
298 let s = crate::core::index_orchestrator::bm25_summary(project_root);
299 match s.state {
300 "failed" => {
301 let why = s
302 .last_error
303 .or(s.note)
304 .unwrap_or_else(|| "unknown error".to_string());
305 format!(
306 "(semantic ranking unavailable — index build FAILED: {why}. {exact}. \
307 Inspect with `ctx_index status` / `lean-ctx doctor`, then `lean-ctx reindex`)"
308 )
309 }
310 "building" => {
311 let secs = s.elapsed_ms.map_or(0, |ms| ms / 1000);
312 format!(
313 "(deferred — semantic index is building ({secs}s elapsed); {exact}, \
314 and ranking becomes available once the build finishes)"
315 )
316 }
317 _ => match s.note {
321 Some(note) if note.contains("NOT persisted") => {
322 format!("(semantic ranking deferred — {note} {exact}.)")
323 }
324 _ => format!(
325 "(deferred — semantic index is warming; {exact}, \
326 and ranking will be fast on the next call once the index is cached)"
327 ),
328 },
329 }
330}
331
332pub fn handle(task: &str, project_root: &str, crp_mode: CrpMode) -> (String, usize) {
334 let task = task.trim();
335 if task.is_empty() {
336 return ("ERROR: task is required".to_string(), 0);
337 }
338
339 let keywords = extract_keywords(task, 6);
340 let allow_secret = crate::core::roles::active_role().io.allow_secret_paths;
341
342 let mut out = String::new();
343 out.push_str(&format!("TASK: {task}\n"));
344 if keywords.is_empty() {
345 out.push_str("KEYWORDS: (none extracted — using full task for ranking)\n");
346 } else {
347 out.push_str(&format!("KEYWORDS: {}\n", keywords.join(", ")));
348 }
349
350 out.push_str("\n## Ranked files (semantic)\n");
355 out.push_str(&ranked_files_budgeted(task, project_root, crp_mode));
356 out.push('\n');
357
358 if let Some(primary) = keywords.first() {
360 let (grep, _g) = crate::tools::ctx_search::handle(
361 primary,
362 project_root,
363 None,
364 10,
365 crp_mode,
366 true,
367 allow_secret,
368 );
369 out.push_str(&format!("\n## Exact matches: '{primary}'\n"));
370 out.push_str(grep.trim());
371 out.push('\n');
372 }
373
374 use crate::core::context_packing::{greedy_max_coverage, CoverageItem};
380 let mut snippets: Vec<String> = Vec::new();
381 let mut items: Vec<CoverageItem> = Vec::new();
382 for kw in &keywords {
383 if let Some((rendered, toks)) =
384 crate::tools::ctx_symbol::best_symbol_snippet(kw, project_root)
385 {
386 let mut terms: std::collections::HashSet<String> =
389 std::collections::HashSet::from([kw.clone()]);
390 for other in &keywords {
391 if other != kw && rendered.contains(other.as_str()) {
392 terms.insert(other.clone());
393 }
394 }
395 items.push(CoverageItem {
396 terms,
397 cost: toks.max(1),
398 });
399 snippets.push(rendered);
400 }
401 }
402 if !items.is_empty() {
403 let chosen = greedy_max_coverage(&items, symbol_budget_tokens(), |_| 1.0);
404 let mut seen = std::collections::HashSet::new();
405 let mut header_written = false;
406 for idx in chosen {
407 let rendered = snippets[idx].trim();
408 if rendered.is_empty() || !seen.insert(rendered.to_string()) {
409 continue;
410 }
411 if !header_written {
412 out.push_str("\n## Top symbols (bodies)\n");
413 header_written = true;
414 }
415 out.push_str(rendered);
416 out.push('\n');
417 }
418 }
419
420 out.push_str(&associative_block_budgeted(project_root, &keywords));
424
425 let sent = count_tokens(&out);
426 (out, sent)
427}
428
429#[cfg(test)]
430mod tests {
431 use super::*;
432
433 #[test]
434 fn extract_keywords_drops_stopwords_and_short_tokens() {
435 let kw = extract_keywords("How does the BM25Index cache work for ctx_search?", 6);
436 assert!(kw.contains(&"BM25Index".to_string()));
437 assert!(kw.contains(&"cache".to_string()));
438 assert!(kw.contains(&"ctx_search".to_string()));
439 assert!(!kw.iter().any(|k| k == "the" || k == "How" || k == "for"));
440 }
441
442 #[test]
443 fn extract_keywords_dedups_and_caps() {
444 let kw = extract_keywords("alpha alpha beta gamma delta epsilon zeta eta", 3);
445 assert_eq!(kw.len(), 3);
446 assert_eq!(kw[0], "alpha");
447 }
448
449 #[test]
450 fn empty_task_is_rejected() {
451 let (out, tok) = handle(" ", "/tmp", CrpMode::Off);
452 assert!(out.starts_with("ERROR"));
453 assert_eq!(tok, 0);
454 }
455
456 #[test]
457 fn deferred_note_for_idle_index_is_optimistic_but_honest() {
458 let tmp = tempfile::tempdir().unwrap();
462 let note = deferred_ranking_note(tmp.path().to_string_lossy().as_ref());
463 assert!(
464 note.contains("warming") || note.contains("building"),
465 "note: {note}"
466 );
467 assert!(
468 note.contains("authoritative"),
469 "note must reassure that exact matches are authoritative: {note}"
470 );
471 assert!(
472 !note.contains("instant on the next call"),
473 "must not repeat the dishonest 'instant next call' promise: {note}"
474 );
475 }
476}