1use std::collections::HashMap;
2use std::path::Path;
3use std::sync::{Arc, Mutex, OnceLock};
4use std::time::{SystemTime, UNIX_EPOCH};
5
6use serde::Serialize;
7
8use crate::core::bm25_index::BM25Index;
9use crate::core::graph_index::{self, ProjectIndex};
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12enum State {
13 Idle,
14 Building,
15 Ready,
16 Failed,
17}
18
19#[derive(Debug, Clone)]
20struct Component {
21 state: State,
22 started_ms: Option<u64>,
23 finished_ms: Option<u64>,
24 duration_ms: Option<u64>,
25 last_error: Option<String>,
26 note: Option<String>,
30}
31
32impl Component {
33 fn new() -> Self {
34 Self {
35 state: State::Idle,
36 started_ms: None,
37 finished_ms: None,
38 duration_ms: None,
39 last_error: None,
40 note: None,
41 }
42 }
43}
44
45#[derive(Debug)]
46struct ProjectBuild {
47 worker_running: bool,
48 warm_triggered: bool,
52 graph: Component,
53 bm25: Component,
54}
55
56impl ProjectBuild {
57 fn new() -> Self {
58 Self {
59 worker_running: false,
60 warm_triggered: false,
61 graph: Component::new(),
62 bm25: Component::new(),
63 }
64 }
65}
66
67static REGISTRY: OnceLock<Mutex<HashMap<String, Arc<Mutex<ProjectBuild>>>>> = OnceLock::new();
75
76fn registry() -> &'static Mutex<HashMap<String, Arc<Mutex<ProjectBuild>>>> {
77 REGISTRY.get_or_init(|| Mutex::new(HashMap::new()))
78}
79
80fn entry_for(project_root: &str) -> Arc<Mutex<ProjectBuild>> {
81 let mut map = registry()
82 .lock()
83 .unwrap_or_else(std::sync::PoisonError::into_inner);
84 map.entry(project_root.to_string())
85 .or_insert_with(|| Arc::new(Mutex::new(ProjectBuild::new())))
86 .clone()
87}
88
89fn now_ms() -> u64 {
90 SystemTime::now()
91 .duration_since(UNIX_EPOCH)
92 .unwrap_or_default()
93 .as_millis() as u64
94}
95
96fn start_component(c: &mut Component) {
97 c.state = State::Building;
98 c.started_ms = Some(now_ms());
99 c.finished_ms = None;
100 c.duration_ms = None;
101 c.last_error = None;
102 c.note = None;
103}
104
105fn finish_ok(c: &mut Component) {
106 c.state = State::Ready;
107 let end = now_ms();
108 c.finished_ms = Some(end);
109 c.duration_ms = c.started_ms.map(|s| end.saturating_sub(s));
110}
111
112fn finish_err(c: &mut Component, e: String) {
113 c.state = State::Failed;
114 let end = now_ms();
115 c.finished_ms = Some(end);
116 c.duration_ms = c.started_ms.map(|s| end.saturating_sub(s));
117 c.last_error = Some(e);
118}
119
120#[derive(Debug, Clone, Copy, PartialEq, Eq)]
125pub enum WarmNeed {
126 None,
128 Search,
130 Heavy,
132}
133
134#[must_use]
138pub fn warm_need_for_tool(tool: &str) -> WarmNeed {
139 match tool {
140 "ctx_search" => WarmNeed::Search,
141 "ctx_graph"
143 | "ctx_callgraph"
144 | "ctx_routes"
145 | "ctx_repomap"
146 | "ctx_impact"
147 | "ctx_artifacts"
148 | "ctx_semantic_search"
149 | "ctx_provider"
150 | "ctx_compose"
151 | "ctx_review" => WarmNeed::Heavy,
152 _ => WarmNeed::None,
153 }
154}
155
156pub fn ensure_warm_for_tool(project_root: &str, tool: &str) -> bool {
163 if project_root.is_empty() {
164 return false;
165 }
166 match warm_need_for_tool(tool) {
167 WarmNeed::None => false,
168 WarmNeed::Search => {
169 crate::core::search_index::ensure_background(project_root, true, false);
172 false
173 }
174 WarmNeed::Heavy => {
175 let entry = entry_for(project_root);
176 let first_warm = {
177 let mut s = entry
178 .lock()
179 .unwrap_or_else(std::sync::PoisonError::into_inner);
180 if s.warm_triggered {
181 false
182 } else {
183 s.warm_triggered = true;
184 true
185 }
186 };
187 if first_warm {
188 ensure_all_background(project_root);
189 }
190 first_warm
191 }
192 }
193}
194
195pub fn ensure_all_background(project_root: &str) {
196 let state = entry_for(project_root);
197 let should_spawn = {
198 let mut s = state
199 .lock()
200 .unwrap_or_else(std::sync::PoisonError::into_inner);
201 if s.worker_running {
202 false
203 } else {
204 s.worker_running = true;
205 true
206 }
207 };
208
209 if !should_spawn {
210 return;
211 }
212
213 let root = project_root.to_string();
214 std::thread::spawn(move || {
215 let state = entry_for(&root);
216
217 crate::core::search_index::ensure_background(&root, true, false);
220
221 {
223 let mut s = state
224 .lock()
225 .unwrap_or_else(std::sync::PoisonError::into_inner);
226 start_component(&mut s.graph);
227 }
228 let graph_result = std::panic::catch_unwind(|| {
229 let (idx, content_cache) = graph_index::scan_with_content_cache(&root);
230 let _ = idx.save();
234 (idx, content_cache)
235 });
236 let content_cache = if let Ok((_idx, cache)) = graph_result {
237 let mut s = state
238 .lock()
239 .unwrap_or_else(std::sync::PoisonError::into_inner);
240 finish_ok(&mut s.graph);
241 cache
242 } else {
243 let mut s = state
244 .lock()
245 .unwrap_or_else(std::sync::PoisonError::into_inner);
246 finish_err(&mut s.graph, "graph index build panicked".to_string());
247 HashMap::new()
248 };
249
250 {
252 let mut s = state
253 .lock()
254 .unwrap_or_else(std::sync::PoisonError::into_inner);
255 start_component(&mut s.bm25);
256 }
257 let bm = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
258 let root_pb = Path::new(&root);
259 let idx = if content_cache.is_empty() {
260 BM25Index::load_or_build(root_pb)
261 } else {
262 BM25Index::build_with_content_hint(root_pb, &content_cache)
263 };
264 let outcome = idx.save(root_pb);
265 (idx.doc_count, outcome)
266 }));
267 if let Ok((doc_count, save_res)) = bm {
268 let mut s = state
269 .lock()
270 .unwrap_or_else(std::sync::PoisonError::into_inner);
271 finish_ok(&mut s.bm25);
272 s.bm25.note = Some(bm25_build_note(doc_count, &save_res));
273 } else {
274 let mut s = state
275 .lock()
276 .unwrap_or_else(std::sync::PoisonError::into_inner);
277 finish_err(&mut s.bm25, "bm25 build panicked".to_string());
278 }
279
280 let mut s = state
281 .lock()
282 .unwrap_or_else(std::sync::PoisonError::into_inner);
283 s.worker_running = false;
284 });
285}
286
287const MAX_EXTRA_ROOT_BUILDS: usize = 8;
291
292pub fn ensure_extra_roots_background(primary_root: &str, extra_roots: &[String]) {
293 let primary = Path::new(primary_root);
294 let mut built = 0;
295 for root in extra_roots {
296 if built >= MAX_EXTRA_ROOT_BUILDS {
297 break;
298 }
299 let rp = Path::new(root);
300 if !rp.is_dir() {
301 continue;
302 }
303 if rp.starts_with(primary) {
305 continue;
306 }
307 if primary.starts_with(rp) {
309 continue;
310 }
311 ensure_all_background(root);
312 built += 1;
313 }
314}
315
316fn bm25_build_note(
321 doc_count: usize,
322 save: &std::io::Result<crate::core::bm25_index::SaveOutcome>,
323) -> String {
324 use crate::core::bm25_index::SaveOutcome;
325 match save {
326 Ok(SaveOutcome::Persisted { compressed_bytes }) => format!(
327 "indexed {doc_count} chunks, {:.1} MB persisted",
328 *compressed_bytes as f64 / 1_048_576.0
329 ),
330 Ok(SaveOutcome::SkippedTooLarge {
331 compressed_bytes,
332 limit_bytes,
333 }) => format!(
334 "indexed {doc_count} chunks but NOT persisted to disk: compressed {:.1} MB exceeds the {:.0} MB cap. \
335 Raise it via LEAN_CTX_BM25_MAX_CACHE_MB (or bm25_max_cache_mb in config) or add extra_ignore_patterns, \
336 then run `lean-ctx reindex`. Until then the index is rebuilt from scratch on every cold start.",
337 *compressed_bytes as f64 / 1_048_576.0,
338 *limit_bytes as f64 / 1_048_576.0
339 ),
340 Err(e) => format!("indexed {doc_count} chunks but persisting failed: {e}"),
341 }
342}
343
344#[derive(Debug, Clone)]
347pub struct Bm25Summary {
348 pub state: &'static str,
349 pub elapsed_ms: Option<u64>,
351 pub note: Option<String>,
352 pub last_error: Option<String>,
353}
354
355pub fn bm25_summary(project_root: &str) -> Bm25Summary {
356 let entry = entry_for(project_root);
357 let s = entry
358 .lock()
359 .unwrap_or_else(std::sync::PoisonError::into_inner);
360 let c = &s.bm25;
361 let elapsed_ms = if matches!(c.state, State::Building) {
362 c.started_ms.map(|start| now_ms().saturating_sub(start))
363 } else {
364 c.duration_ms
365 };
366 Bm25Summary {
367 state: match c.state {
368 State::Idle => "idle",
369 State::Building => "building",
370 State::Ready => "ready",
371 State::Failed => "failed",
372 },
373 elapsed_ms,
374 note: c.note.clone(),
375 last_error: c.last_error.clone(),
376 }
377}
378
379pub fn try_load_graph_index(project_root: &str) -> Option<ProjectIndex> {
380 crate::core::graph_cache::get_cached(project_root).map(|arc| (*arc).clone())
383}
384
385pub fn try_load_bm25_index(project_root: &str) -> Option<BM25Index> {
386 BM25Index::load(Path::new(project_root))
387}
388
389pub fn is_building() -> bool {
391 let map = registry()
392 .lock()
393 .unwrap_or_else(std::sync::PoisonError::into_inner);
394 map.values().any(|entry| {
395 let s = entry
396 .lock()
397 .unwrap_or_else(std::sync::PoisonError::into_inner);
398 matches!(s.bm25.state, State::Building) || matches!(s.graph.state, State::Building)
399 })
400}
401
402#[derive(Debug, Serialize)]
403struct ComponentStatus<'a> {
404 state: &'a str,
405 started_ms: Option<u64>,
406 finished_ms: Option<u64>,
407 duration_ms: Option<u64>,
408 last_error: Option<&'a str>,
409 #[serde(skip_serializing_if = "Option::is_none")]
410 note: Option<&'a str>,
411}
412
413fn component_status(c: &Component) -> ComponentStatus<'_> {
414 ComponentStatus {
415 state: match c.state {
416 State::Idle => "idle",
417 State::Building => "building",
418 State::Ready => "ready",
419 State::Failed => "failed",
420 },
421 started_ms: c.started_ms,
422 finished_ms: c.finished_ms,
423 duration_ms: c.duration_ms,
424 last_error: c.last_error.as_deref(),
425 note: c.note.as_deref(),
426 }
427}
428
429#[derive(Debug, Serialize)]
430struct StatusResponse<'a> {
431 project_root: &'a str,
432 graph_index: ComponentStatus<'a>,
433 bm25_index: ComponentStatus<'a>,
434 disk: DiskStatusAll,
435}
436
437#[derive(Debug, Serialize, Default)]
438pub struct DiskStatus {
439 pub exists: bool,
440 pub size_bytes: Option<u64>,
441 pub file_count: Option<u64>,
442 pub modified_at: Option<String>,
443}
444
445#[derive(Debug, Serialize, Default)]
446pub struct DiskStatusAll {
447 pub graph_index: DiskStatus,
448 pub bm25_index: DiskStatus,
449 pub code_graph: DiskStatus,
450}
451
452fn disk_status_for_graph(project_root: &str) -> DiskStatus {
453 let Some(dir) = graph_index::ProjectIndex::index_dir(project_root) else {
454 return DiskStatus::default();
455 };
456 let zst = dir.join("index.json.zst");
457 let json = dir.join("index.json");
458 let path = if zst.exists() {
459 zst
460 } else if json.exists() {
461 json
462 } else {
463 return DiskStatus::default();
464 };
465 let meta = std::fs::metadata(&path).ok();
466 let file_count =
467 graph_index::ProjectIndex::load(project_root).map(|idx| idx.files.len() as u64);
468 DiskStatus {
469 exists: true,
470 size_bytes: meta.as_ref().map(std::fs::Metadata::len),
471 file_count,
472 modified_at: meta.and_then(|m| m.modified().ok()).map(format_time),
473 }
474}
475
476fn disk_status_for_bm25(project_root: &str) -> DiskStatus {
477 let root = Path::new(project_root);
478 let path = BM25Index::index_file_path(root);
479 if !path.exists() {
480 return DiskStatus::default();
481 }
482 let meta = std::fs::metadata(&path).ok();
483 DiskStatus {
484 exists: true,
485 size_bytes: meta.as_ref().map(std::fs::Metadata::len),
486 file_count: None,
487 modified_at: meta.and_then(|m| m.modified().ok()).map(format_time),
488 }
489}
490
491fn disk_status_for_code_graph(project_root: &str) -> DiskStatus {
492 let dir = crate::core::property_graph::graph_dir(project_root);
493 let db_path = dir.join("graph.db");
494 if !db_path.exists() {
495 return DiskStatus::default();
496 }
497 let meta = std::fs::metadata(&db_path).ok();
498 let node_count = crate::core::property_graph::CodeGraph::open(project_root)
499 .ok()
500 .and_then(|g| {
501 g.connection()
502 .query_row("SELECT count(*) FROM nodes", [], |r| r.get::<_, i64>(0))
503 .ok()
504 .map(|c| c as u64)
505 });
506 DiskStatus {
507 exists: true,
508 size_bytes: meta.as_ref().map(std::fs::Metadata::len),
509 file_count: node_count,
510 modified_at: meta.and_then(|m| m.modified().ok()).map(format_time),
511 }
512}
513
514fn format_time(t: SystemTime) -> String {
515 let secs = t.duration_since(UNIX_EPOCH).unwrap_or_default().as_secs();
516 let dt = chrono::DateTime::from_timestamp(secs as i64, 0);
517 dt.map_or_else(
518 || format!("{secs}"),
519 |d| d.format("%Y-%m-%d %H:%M:%S UTC").to_string(),
520 )
521}
522
523pub fn disk_status(project_root: &str) -> DiskStatusAll {
524 DiskStatusAll {
525 graph_index: disk_status_for_graph(project_root),
526 bm25_index: disk_status_for_bm25(project_root),
527 code_graph: disk_status_for_code_graph(project_root),
528 }
529}
530
531pub fn status_json(project_root: &str) -> String {
532 let state = entry_for(project_root);
533 let s = state
534 .lock()
535 .unwrap_or_else(std::sync::PoisonError::into_inner);
536 let res = StatusResponse {
537 project_root,
538 graph_index: component_status(&s.graph),
539 bm25_index: component_status(&s.bm25),
540 disk: disk_status(project_root),
541 };
542 serde_json::to_string(&res).unwrap_or_else(|_| "{}".to_string())
543}
544
545#[cfg(test)]
546mod tests {
547 use super::*;
548
549 #[test]
550 fn status_json_is_valid_json() {
551 let s = status_json("/tmp");
552 let _: serde_json::Value = serde_json::from_str(&s).unwrap();
553 }
554
555 #[test]
556 fn warm_need_classifies_tools() {
557 for light in [
559 "ctx_read",
560 "ctx_shell",
561 "ctx_tree",
562 "ctx_knowledge",
563 "unknown_tool",
564 ] {
565 assert_eq!(warm_need_for_tool(light), WarmNeed::None, "{light}");
566 }
567 assert_eq!(warm_need_for_tool("ctx_search"), WarmNeed::Search);
569 for heavy in [
571 "ctx_graph",
572 "ctx_callgraph",
573 "ctx_routes",
574 "ctx_repomap",
575 "ctx_impact",
576 "ctx_artifacts",
577 "ctx_semantic_search",
578 "ctx_provider",
579 "ctx_compose",
580 "ctx_review",
581 ] {
582 assert_eq!(warm_need_for_tool(heavy), WarmNeed::Heavy, "{heavy}");
583 }
584 }
585
586 #[test]
587 fn ensure_warm_lightweight_and_search_never_signal_first_warm() {
588 assert!(!ensure_warm_for_tool("", "ctx_graph"));
591 let tmp = tempfile::tempdir().unwrap();
592 let root = tmp.path().to_string_lossy().to_string();
593 assert!(!ensure_warm_for_tool(&root, "ctx_read"));
594 assert!(!ensure_warm_for_tool(&root, "ctx_search"));
595 }
596
597 #[test]
598 fn ensure_warm_heavy_is_once_per_root() {
599 let tmp = tempfile::tempdir().unwrap();
603 let root = tmp.path().to_string_lossy().to_string();
604 assert!(
605 ensure_warm_for_tool(&root, "ctx_callgraph"),
606 "first heavy warm must signal true"
607 );
608 assert!(
609 !ensure_warm_for_tool(&root, "ctx_callgraph"),
610 "second heavy warm must be deduped to false"
611 );
612 assert!(
613 !ensure_warm_for_tool(&root, "ctx_semantic_search"),
614 "any later heavy tool on the same root is also deduped"
615 );
616 }
617
618 #[test]
619 fn build_note_persisted_reports_size() {
620 let note = bm25_build_note(
621 42,
622 &Ok(crate::core::bm25_index::SaveOutcome::Persisted {
623 compressed_bytes: 3 * 1024 * 1024,
624 }),
625 );
626 assert!(
627 note.contains("42 chunks"),
628 "note should report chunk count: {note}"
629 );
630 assert!(
631 note.contains("persisted"),
632 "note should report persistence: {note}"
633 );
634 }
635
636 #[test]
637 fn build_note_too_large_carries_remedy() {
638 let note = bm25_build_note(
639 1000,
640 &Ok(crate::core::bm25_index::SaveOutcome::SkippedTooLarge {
641 compressed_bytes: 600 * 1024 * 1024,
642 limit_bytes: 512 * 1024 * 1024,
643 }),
644 );
645 assert!(
646 note.contains("NOT persisted"),
647 "must flag non-persistence: {note}"
648 );
649 assert!(
650 note.contains("LEAN_CTX_BM25_MAX_CACHE_MB") && note.contains("reindex"),
651 "too-large note must carry an actionable remedy: {note}"
652 );
653 }
654
655 #[test]
656 fn build_note_persist_error_is_reported() {
657 let note = bm25_build_note(7, &Err(std::io::Error::other("disk full")));
658 assert!(note.contains("persisting failed"), "note: {note}");
659 assert!(
660 note.contains("disk full"),
661 "note should include the io error: {note}"
662 );
663 }
664
665 #[test]
666 fn bm25_summary_unknown_project_is_idle() {
667 let tmp = tempfile::tempdir().unwrap();
668 let summary = bm25_summary(tmp.path().to_string_lossy().as_ref());
669 assert_eq!(summary.state, "idle");
670 assert!(summary.note.is_none());
671 assert!(summary.last_error.is_none());
672 }
673
674 #[test]
675 fn extra_roots_skips_subdirs_of_primary() {
676 let tmp = tempfile::tempdir().unwrap();
677 let primary = tmp.path().join("primary");
678 std::fs::create_dir_all(&primary).unwrap();
679 let sub = primary.join("subdir");
680 std::fs::create_dir_all(&sub).unwrap();
681 let external = tmp.path().join("external");
682 std::fs::create_dir_all(&external).unwrap();
683
684 let primary_str = primary.to_string_lossy().to_string();
685 let extra = vec![
686 sub.to_string_lossy().to_string(),
687 external.to_string_lossy().to_string(),
688 ];
689
690 ensure_extra_roots_background(&primary_str, &extra);
692 }
693
694 #[test]
695 fn extra_roots_caps_at_max() {
696 let tmp = tempfile::tempdir().unwrap();
697 let primary = tmp.path().join("primary");
698 std::fs::create_dir_all(&primary).unwrap();
699
700 let mut extra = Vec::new();
701 for i in 0..20 {
702 let d = tmp.path().join(format!("ext-{i}"));
703 std::fs::create_dir_all(&d).unwrap();
704 extra.push(d.to_string_lossy().to_string());
705 }
706
707 let primary_str = primary.to_string_lossy().to_string();
708 ensure_extra_roots_background(&primary_str, &extra);
710 }
711}