Skip to main content

symforge/live_index/
store.rs

1use std::collections::{HashMap, HashSet};
2use std::ops::{Deref, DerefMut};
3use std::path::Path;
4use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering};
5use std::sync::{Arc, Mutex};
6
7use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard};
8use std::time::{Duration, Instant, SystemTime};
9
10use rayon::prelude::*;
11use tracing::{error, info, warn};
12
13use super::query::RepoOutlineView;
14use crate::domain::ParseDiagnostic;
15use crate::domain::index::{AdmissionTier, SkippedFile};
16use crate::domain::{
17    FileClassification, FileOutcome, FileProcessingResult, LanguageId, ReferenceRecord,
18    SymbolRecord, find_enclosing_symbol,
19};
20use crate::{discovery, parsing};
21
22/// Per-file parse status stored in the index.
23#[derive(Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
24pub enum ParseStatus {
25    /// File parsed successfully with no syntax errors.
26    Parsed,
27    /// File parsed but tree-sitter reported syntax errors; symbols were still extracted.
28    PartialParse { warning: String },
29    /// File could not be parsed at all; symbols list is empty but content bytes are stored.
30    Failed { error: String },
31}
32
33/// A single indexed file — all data needed for query and display.
34#[derive(Clone, Debug)]
35pub struct IndexedFile {
36    pub relative_path: String,
37    pub language: LanguageId,
38    pub classification: FileClassification,
39    /// Raw file bytes stored in memory (LIDX-03 — zero disk I/O on read path).
40    pub content: Vec<u8>,
41    /// Symbols extracted by the parser.
42    pub symbols: Vec<SymbolRecord>,
43    pub parse_status: ParseStatus,
44    pub parse_diagnostic: Option<ParseDiagnostic>,
45    pub byte_len: u64,
46    pub content_hash: String,
47    /// Cross-references extracted by xref::extract_references (Phase 4).
48    pub references: Vec<ReferenceRecord>,
49    /// Import alias map for this file: alias -> original name.
50    pub alias_map: HashMap<String, String>,
51    /// Unix timestamp (seconds) of the file's mtime when it was last indexed.
52    /// Used by the freshness guard to detect files that changed on disk after indexing.
53    /// Zero means mtime was not recorded (indexed before this field was added).
54    pub mtime_secs: u64,
55}
56
57/// Identifies a single reference within a specific file.
58/// Used as a value in `LiveIndex::reverse_index`.
59#[derive(Clone, Debug)]
60pub struct ReferenceLocation {
61    /// Relative path of the file containing the reference.
62    pub file_path: String,
63    /// Index into `IndexedFile::references` for the specific `ReferenceRecord`.
64    pub reference_idx: u32,
65}
66
67impl IndexedFile {
68    pub fn from_parse_result(result: FileProcessingResult, content: Vec<u8>) -> Self {
69        let parse_status = match &result.outcome {
70            FileOutcome::Processed => ParseStatus::Parsed,
71            FileOutcome::PartialParse { warning } => ParseStatus::PartialParse {
72                warning: warning.clone(),
73            },
74            FileOutcome::Failed { error } => ParseStatus::Failed {
75                error: error.clone(),
76            },
77        };
78
79        // Destructure the result so we can consume references while borrowing symbols.
80        let FileProcessingResult {
81            relative_path,
82            language,
83            classification,
84            outcome: _,
85            parse_diagnostic,
86            symbols,
87            byte_len,
88            content_hash,
89            references: raw_references,
90            alias_map,
91        } = result;
92
93        // Build a set of symbol byte ranges so we can filter definition-site hits
94        // (Pitfall 1: a reference whose byte_range exactly matches a symbol's byte_range
95        // is the definition itself — not a usage site).
96        let symbol_byte_ranges: std::collections::HashSet<(u32, u32)> =
97            symbols.iter().map(|s| s.byte_range).collect();
98
99        // Assign enclosing_symbol_index for each reference and skip definition sites.
100        let references: Vec<ReferenceRecord> = raw_references
101            .into_iter()
102            .filter(|r| !symbol_byte_ranges.contains(&r.byte_range))
103            .map(|mut r| {
104                if r.enclosing_symbol_index.is_none() {
105                    r.enclosing_symbol_index = find_enclosing_symbol(&symbols, r.line_range.0);
106                }
107                r
108            })
109            .collect();
110
111        IndexedFile {
112            relative_path,
113            language,
114            classification,
115            content,
116            symbols,
117            parse_status,
118            parse_diagnostic,
119            byte_len,
120            content_hash,
121            references,
122            alias_map,
123            mtime_secs: 0,
124        }
125    }
126
127    /// Set the mtime recorded at index time. Call after `from_parse_result` for
128    /// callers that have the file metadata available.
129    pub fn with_mtime(mut self, mtime_secs: u64) -> Self {
130        self.mtime_secs = mtime_secs;
131        self
132    }
133}
134
135impl AsRef<IndexedFile> for IndexedFile {
136    fn as_ref(&self) -> &IndexedFile {
137        self
138    }
139}
140
141/// Tracks parse failures during index loading for the circuit breaker.
142pub struct CircuitBreakerState {
143    total: AtomicUsize,
144    failed: AtomicUsize,
145    tripped: AtomicBool,
146    /// Failure threshold as a fraction (e.g., 0.20 = 20%).
147    threshold: f64,
148    /// First few failure details (path, reason) for summary reporting.
149    failure_details: Mutex<Vec<(String, String)>>,
150}
151
152impl CircuitBreakerState {
153    /// Create with an explicit threshold (for testability).
154    pub fn new(threshold: f64) -> Self {
155        Self {
156            total: AtomicUsize::new(0),
157            failed: AtomicUsize::new(0),
158            tripped: AtomicBool::new(false),
159            threshold,
160            failure_details: Mutex::new(Vec::new()),
161        }
162    }
163
164    /// Create using the `SYMFORGE_CB_THRESHOLD` env var, defaulting to 0.20.
165    pub fn from_env() -> Self {
166        let threshold = std::env::var("SYMFORGE_CB_THRESHOLD")
167            .ok()
168            .and_then(|v| v.parse::<f64>().ok())
169            .unwrap_or(0.20);
170        Self::new(threshold)
171    }
172
173    pub fn record_success(&self) {
174        self.total.fetch_add(1, Ordering::Relaxed);
175    }
176
177    pub fn record_failure(&self, path: &str, reason: &str) {
178        self.total.fetch_add(1, Ordering::Relaxed);
179        self.failed.fetch_add(1, Ordering::Relaxed);
180
181        let mut details = self.failure_details.lock().unwrap();
182        if details.len() < 5 {
183            details.push((path.to_string(), reason.to_string()));
184        }
185    }
186
187    /// Returns `true` when the failure rate exceeds the threshold.
188    ///
189    /// IMPORTANT: returns `false` when fewer than 5 files have been processed
190    /// (minimum-file guard prevents spurious trips on tiny repos).
191    pub fn should_abort(&self) -> bool {
192        let total = self.total.load(Ordering::Relaxed);
193        if total < 5 {
194            return false;
195        }
196        let failed = self.failed.load(Ordering::Relaxed);
197        let rate = failed as f64 / total as f64;
198        if rate > self.threshold {
199            self.tripped.store(true, Ordering::Relaxed);
200            true
201        } else {
202            false
203        }
204    }
205
206    pub fn is_tripped(&self) -> bool {
207        self.tripped.load(Ordering::Relaxed)
208    }
209
210    /// One-line summary plus top failure details.
211    pub fn summary(&self) -> String {
212        let total = self.total.load(Ordering::Relaxed);
213        let failed = self.failed.load(Ordering::Relaxed);
214        let rate = if total > 0 {
215            (failed as f64 / total as f64 * 100.0) as u32
216        } else {
217            0
218        };
219
220        let details = self.failure_details.lock().unwrap();
221        let top_failures: Vec<String> = details
222            .iter()
223            .take(3)
224            .map(|(p, r)| format!("  - {p}: {r}"))
225            .collect();
226
227        let mut msg = format!(
228            "circuit breaker tripped: {failed}/{total} files failed ({rate}% > {}%)",
229            (self.threshold * 100.0) as u32
230        );
231        if !top_failures.is_empty() {
232            msg.push_str("\nTop failures:\n");
233            msg.push_str(&top_failures.join("\n"));
234        }
235        msg
236    }
237}
238
239/// Overall state of the index.
240#[derive(Clone, Debug, PartialEq, Eq)]
241pub enum IndexState {
242    /// Index was constructed with empty() — no files loaded yet.
243    Empty,
244    Loading,
245    Ready,
246    CircuitBreakerTripped {
247        summary: String,
248    },
249}
250
251/// Where the current in-memory index contents were sourced from.
252#[derive(Copy, Clone, Debug, PartialEq, Eq)]
253pub enum IndexLoadSource {
254    EmptyBootstrap,
255    FreshLoad,
256    SnapshotRestore,
257}
258
259/// Reconciliation status after restoring from a persisted snapshot.
260#[derive(Copy, Clone, Debug, PartialEq, Eq)]
261pub enum SnapshotVerifyState {
262    NotNeeded,
263    Pending,
264    Running,
265    Completed,
266}
267
268/// Compact published status label for handle-level state consumers.
269#[derive(Copy, Clone, Debug, PartialEq, Eq)]
270pub enum PublishedIndexStatus {
271    Empty,
272    Loading,
273    Ready,
274    Degraded,
275}
276
277/// Lightweight published state captured from the live index.
278#[derive(Clone, Debug, PartialEq, Eq)]
279pub struct PublishedIndexState {
280    pub generation: u64,
281    pub status: PublishedIndexStatus,
282    pub degraded_summary: Option<String>,
283    pub file_count: usize,
284    pub parsed_count: usize,
285    pub partial_parse_count: usize,
286    pub failed_count: usize,
287    pub symbol_count: usize,
288    pub loaded_at_system: SystemTime,
289    pub load_duration: Duration,
290    pub load_source: IndexLoadSource,
291    pub snapshot_verify_state: SnapshotVerifyState,
292    pub is_empty: bool,
293    /// Admission tier counts: (Tier1 indexed, Tier2 metadata-only, Tier3 hard-skipped).
294    pub tier_counts: (usize, usize, usize),
295}
296
297/// The in-memory index: file contents and parsed symbols for all discovered files.
298pub struct LiveIndex {
299    /// Keyed by `relative_path` (forward-slash normalized).
300    pub(crate) files: HashMap<String, Arc<IndexedFile>>,
301    pub(crate) loaded_at: Instant,
302    /// Wall-clock time when index was last loaded. Used by what_changed tool.
303    pub(crate) loaded_at_system: SystemTime,
304    pub(crate) load_duration: Duration,
305    pub(crate) cb_state: CircuitBreakerState,
306    /// True when constructed with empty() and reload() has not been called.
307    pub(crate) is_empty: bool,
308    /// Provenance for the current live contents.
309    pub(crate) load_source: IndexLoadSource,
310    /// Snapshot reconciliation status for snapshot-restored indices.
311    pub(crate) snapshot_verify_state: SnapshotVerifyState,
312    /// Repo-level reverse index: reference name -> all locations in the index.
313    /// Updated incrementally on single-file mutations (update_file, remove_file);
314    /// rebuilt from scratch on bulk operations (load, reload, snapshot restore).
315    pub(crate) reverse_index: HashMap<String, Vec<ReferenceLocation>>,
316    /// Secondary path index: lowercase basename -> sorted matching relative paths.
317    pub(crate) files_by_basename: HashMap<String, Vec<String>>,
318    /// Secondary path index: lowercase directory component -> sorted matching relative paths.
319    pub(crate) files_by_dir_component: HashMap<String, Vec<String>>,
320    /// Trigram search index for file-level text search acceleration.
321    pub(crate) trigram_index: super::trigram::TrigramIndex,
322    /// Compiled gitignore patterns loaded at index time. Used by NoisePolicy
323    /// to classify files as vendor/generated/ignored noise.
324    pub(crate) gitignore: Option<ignore::gitignore::Gitignore>,
325    /// Files that were not fully indexed (Tier 2 metadata-only or Tier 3 hard-skipped).
326    pub(crate) skipped_files: Vec<SkippedFile>,
327}
328
329/// Lightweight snapshot of a symbol for pre-update diffing in `analyze_file_impact`.
330///
331/// Stored in [`SharedIndexHandle::pre_update_symbols`] so the impact tool can
332/// compare against the state *before* the watcher or edit tools re-indexed.
333#[derive(Clone, Debug)]
334pub struct PreUpdateSymbol {
335    pub name: String,
336    pub kind: String,
337    pub line_range: (u32, u32),
338    pub byte_range: (u32, u32),
339}
340
341/// Central shared handle for the live in-memory index.
342///
343/// This is intentionally a thin compatibility shell over the current `RwLock<LiveIndex>` so the
344/// project can later attach published read snapshots or other state-machine metadata here without
345/// another repo-wide alias migration.
346///
347/// # Lock ordering
348///
349/// When multiple locks must be held simultaneously, always acquire them in this order to prevent
350/// deadlocks:
351///
352/// 1. `live`
353/// 2. `pre_update_symbols`
354/// 3. `published_state`
355/// 4. `published_repo_outline`
356///
357/// `git_temporal` is an independently locked side-table and may be acquired in any position
358/// relative to the others provided it is **not** held while acquiring `live`.
359pub struct SharedIndexHandle {
360    live: RwLock<LiveIndex>,
361    published_state: RwLock<Arc<PublishedIndexState>>,
362    published_repo_outline: RwLock<Arc<RepoOutlineView>>,
363    next_generation: AtomicU64,
364    /// Git temporal intelligence — independently locked side-table with
365    /// per-file churn, ownership, and co-change data. Populated asynchronously
366    /// after index load/reload completes.
367    git_temporal: RwLock<Arc<super::git_temporal::GitTemporalIndex>>,
368    /// Pre-update symbol snapshots: saved automatically by `update_file` before
369    /// the index entry is replaced. Consumed (take) by `analyze_file_impact` to
370    /// compute accurate diffs even when the watcher re-indexes before the hook fires.
371    pre_update_symbols: RwLock<HashMap<String, Vec<PreUpdateSymbol>>>,
372}
373
374/// Write guard that republishes lightweight handle state when mutated data is released.
375pub struct SharedIndexWriteGuard<'a> {
376    handle: &'a SharedIndexHandle,
377    guard: RwLockWriteGuard<'a, LiveIndex>,
378    dirty: bool,
379}
380
381impl SharedIndexHandle {
382    pub fn new(index: LiveIndex) -> Self {
383        let published_state = Arc::new(PublishedIndexState::capture(0, &index));
384        let published_repo_outline = Arc::new(index.capture_repo_outline_view());
385        Self {
386            live: RwLock::new(index),
387            published_state: RwLock::new(published_state),
388            published_repo_outline: RwLock::new(published_repo_outline),
389            next_generation: AtomicU64::new(1),
390            git_temporal: RwLock::new(Arc::new(super::git_temporal::GitTemporalIndex::pending())),
391            pre_update_symbols: RwLock::new(HashMap::new()),
392        }
393    }
394
395    pub fn shared(index: LiveIndex) -> Arc<Self> {
396        Arc::new(Self::new(index))
397    }
398
399    pub fn read(&self) -> RwLockReadGuard<'_, LiveIndex> {
400        self.live.read()
401    }
402
403    pub fn write(&self) -> SharedIndexWriteGuard<'_> {
404        SharedIndexWriteGuard {
405            handle: self,
406            guard: self.live.write(),
407            dirty: false,
408        }
409    }
410
411    pub fn published_state(&self) -> Arc<PublishedIndexState> {
412        self.published_state.read().clone()
413    }
414
415    pub fn published_repo_outline(&self) -> Arc<RepoOutlineView> {
416        self.published_repo_outline.read().clone()
417    }
418
419    pub fn reload(&self, root: &Path) -> anyhow::Result<()> {
420        // Build new index data OUTSIDE the write lock (file I/O + parsing).
421        // Only the final swap acquires the lock, reducing block time from
422        // seconds (full I/O) to milliseconds (in-memory index rebuild).
423        let data = LiveIndex::build_reload_data(root)?;
424        let mut live = self.live.write();
425        live.apply_reload_data(data);
426        self.publish_locked(&live);
427        Ok(())
428    }
429
430    pub fn update_file(&self, path: String, file: IndexedFile) {
431        let mut live = self.live.write();
432        // Capture pre-update symbols so analyze_file_impact can diff correctly
433        // even when the watcher re-indexes before the hook fires.
434        if let Some(existing) = live.get_file(&path) {
435            let snapshot: Vec<PreUpdateSymbol> = existing
436                .symbols
437                .iter()
438                .map(|s| PreUpdateSymbol {
439                    name: s.name.clone(),
440                    kind: s.kind.to_string(),
441                    line_range: s.line_range,
442                    byte_range: s.byte_range,
443                })
444                .collect();
445            self.pre_update_symbols
446                .write()
447                .insert(path.clone(), snapshot);
448        }
449        let path_clone = path.clone();
450        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
451            live.update_file(path, file);
452        }));
453        if let Err(panic_info) = result {
454            let msg = panic_info
455                .downcast_ref::<String>()
456                .map(|s| s.as_str())
457                .or_else(|| panic_info.downcast_ref::<&str>().copied())
458                .unwrap_or("unknown");
459            tracing::error!(
460                "index mutation panicked for '{}': {} — repairing",
461                path_clone,
462                msg
463            );
464            live.repair_file_indices(&path_clone);
465        }
466        self.publish_locked(&live);
467    }
468
469    pub fn add_file(&self, path: String, file: IndexedFile) {
470        let mut live = self.live.write();
471        let path_clone = path.clone();
472        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
473            live.add_file(path, file);
474        }));
475        if let Err(panic_info) = result {
476            let msg = panic_info
477                .downcast_ref::<String>()
478                .map(|s| s.as_str())
479                .or_else(|| panic_info.downcast_ref::<&str>().copied())
480                .unwrap_or("unknown");
481            tracing::error!(
482                "index add panicked for '{}': {} — repairing",
483                path_clone,
484                msg
485            );
486            live.repair_file_indices(&path_clone);
487        }
488        self.publish_locked(&live);
489    }
490
491    pub fn remove_file(&self, path: &str) {
492        let mut live = self.live.write();
493        let path_owned = path.to_string();
494        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
495            live.remove_file(path);
496        }));
497        if let Err(panic_info) = result {
498            let msg = panic_info
499                .downcast_ref::<String>()
500                .map(|s| s.as_str())
501                .or_else(|| panic_info.downcast_ref::<&str>().copied())
502                .unwrap_or("unknown");
503            tracing::error!(
504                "index remove panicked for '{}': {} — repairing",
505                path_owned,
506                msg
507            );
508            live.repair_file_indices(&path_owned);
509        }
510        self.publish_locked(&live);
511    }
512
513    pub fn mark_snapshot_verify_running(&self) {
514        let mut live = self.live.write();
515        live.mark_snapshot_verify_running();
516        self.publish_locked(&live);
517    }
518
519    pub fn mark_snapshot_verify_completed(&self) {
520        let mut live = self.live.write();
521        live.mark_snapshot_verify_completed();
522        self.publish_locked(&live);
523    }
524
525    fn publish_locked(&self, live: &LiveIndex) {
526        let generation = self.next_generation.fetch_add(1, Ordering::Relaxed);
527        let published_state = Arc::new(PublishedIndexState::capture(generation, live));
528        let published_repo_outline = Arc::new(live.capture_repo_outline_view());
529        *self.published_state.write() = published_state;
530        *self.published_repo_outline.write() = published_repo_outline;
531    }
532
533    /// Read the current git temporal index (lock-free Arc clone).
534    pub fn git_temporal(&self) -> Arc<super::git_temporal::GitTemporalIndex> {
535        self.git_temporal.read().clone()
536    }
537
538    /// Take (consume) the pre-update symbol snapshot for a file, if any.
539    ///
540    /// Used by `analyze_file_impact` to get the symbols from *before* the last
541    /// `update_file` call — prevents the watcher race where the index is already
542    /// updated to the post-edit state before the hook fires.
543    pub fn take_pre_update_symbols(&self, path: &str) -> Option<Vec<PreUpdateSymbol>> {
544        self.pre_update_symbols.write().remove(path)
545    }
546
547    /// Atomically replace the git temporal index with a new version.
548    pub fn update_git_temporal(&self, index: super::git_temporal::GitTemporalIndex) {
549        *self.git_temporal.write() = Arc::new(index);
550    }
551}
552
553impl<'a> Deref for SharedIndexWriteGuard<'a> {
554    type Target = LiveIndex;
555
556    fn deref(&self) -> &Self::Target {
557        &self.guard
558    }
559}
560
561impl DerefMut for SharedIndexWriteGuard<'_> {
562    fn deref_mut(&mut self) -> &mut Self::Target {
563        self.dirty = true;
564        &mut self.guard
565    }
566}
567
568impl Drop for SharedIndexWriteGuard<'_> {
569    fn drop(&mut self) {
570        if self.dirty {
571            self.handle.publish_locked(&self.guard);
572        }
573    }
574}
575
576/// Thread-safe shared handle to the index.
577pub type SharedIndex = Arc<SharedIndexHandle>;
578
579impl PublishedIndexState {
580    fn capture(generation: u64, index: &LiveIndex) -> Self {
581        let (status, degraded_summary) = match index.index_state() {
582            IndexState::Empty => (PublishedIndexStatus::Empty, None),
583            IndexState::Loading => (PublishedIndexStatus::Loading, None),
584            IndexState::Ready => (PublishedIndexStatus::Ready, None),
585            IndexState::CircuitBreakerTripped { summary } => {
586                (PublishedIndexStatus::Degraded, Some(summary))
587            }
588        };
589        let stats = index.health_stats();
590        Self {
591            generation,
592            status,
593            degraded_summary,
594            file_count: stats.file_count,
595            parsed_count: stats.parsed_count,
596            partial_parse_count: stats.partial_parse_count,
597            failed_count: stats.failed_count,
598            symbol_count: stats.symbol_count,
599            loaded_at_system: index.loaded_at_system,
600            load_duration: stats.load_duration,
601            load_source: index.load_source,
602            snapshot_verify_state: index.snapshot_verify_state,
603            is_empty: index.is_empty,
604            tier_counts: stats.tier_counts,
605        }
606    }
607
608    pub fn status_label(&self) -> &'static str {
609        match self.status {
610            PublishedIndexStatus::Empty => "Empty",
611            PublishedIndexStatus::Loading => "Loading",
612            PublishedIndexStatus::Ready => "Ready",
613            PublishedIndexStatus::Degraded => "Degraded",
614        }
615    }
616}
617
618/// Secondary indices derived from a single `files` map snapshot.
619/// Invariant: these indices are one coherent snapshot derived from exactly
620/// the `files` map they are paired with. Grouping them enforces this.
621pub(crate) struct DerivedIndices {
622    pub trigram_index: super::trigram::TrigramIndex,
623    pub reverse_index: HashMap<String, Vec<ReferenceLocation>>,
624    pub files_by_basename: HashMap<String, Vec<String>>,
625    pub files_by_dir_component: HashMap<String, Vec<String>>,
626}
627
628impl DerivedIndices {
629    /// Build all derived indices from a file map. Pure function — no side effects,
630    /// no locks, safe to call from any thread.
631    pub(crate) fn build_from_files(files: &HashMap<String, Arc<IndexedFile>>) -> Self {
632        let (files_by_basename, files_by_dir_component) = build_path_indices_from_files(files);
633        Self {
634            trigram_index: super::trigram::TrigramIndex::build_from_files(files),
635            reverse_index: build_reverse_index_from_files(files),
636            files_by_basename,
637            files_by_dir_component,
638        }
639    }
640}
641
642/// Pre-computed reload data built outside any lock.
643///
644/// Contains everything needed to swap into a `LiveIndex` under the write lock.
645/// All derived indices are pre-built so that `apply_reload_data` is pure field
646/// assignment (microseconds, not milliseconds).
647///
648/// # Failure boundaries
649///
650/// `build_reload_data()` is all-or-nothing and side-effect-free with respect to
651/// the live index state. Only `apply_reload_data()` mutates the live state, and
652/// it cannot fail — it's pure assignment.
653pub(crate) struct ReloadData {
654    pub files: HashMap<String, Arc<IndexedFile>>,
655    pub cb_state: CircuitBreakerState,
656    pub load_duration: Duration,
657    pub gitignore: Option<ignore::gitignore::Gitignore>,
658    pub derived: DerivedIndices,
659    pub skipped_files: Vec<SkippedFile>,
660}
661
662/// Build a reverse index from a file map (standalone, no `&self` needed).
663pub(crate) fn build_reverse_index_from_files(
664    files: &HashMap<String, Arc<IndexedFile>>,
665) -> HashMap<String, Vec<ReferenceLocation>> {
666    let mut idx: HashMap<String, Vec<ReferenceLocation>> = HashMap::new();
667    for (file_path, indexed_file) in files {
668        for (reference_idx, reference) in indexed_file.references.iter().enumerate() {
669            idx.entry(reference.name.clone())
670                .or_default()
671                .push(ReferenceLocation {
672                    file_path: file_path.clone(),
673                    reference_idx: reference_idx as u32,
674                });
675        }
676    }
677    idx
678}
679
680/// Build path indices (basename + dir component) from a file map.
681pub(crate) fn build_path_indices_from_files(
682    files: &HashMap<String, Arc<IndexedFile>>,
683) -> (HashMap<String, Vec<String>>, HashMap<String, Vec<String>>) {
684    let mut by_basename: HashMap<String, Vec<String>> = HashMap::new();
685    let mut by_dir_component: HashMap<String, Vec<String>> = HashMap::new();
686    for path in files.keys() {
687        if let Some(basename) = basename_key(path) {
688            insert_sorted_unique(by_basename.entry(basename).or_default(), path);
689        }
690        for component in dir_component_keys(path) {
691            insert_sorted_unique(by_dir_component.entry(component).or_default(), path);
692        }
693    }
694    (by_basename, by_dir_component)
695}
696
697impl LiveIndex {
698    /// Load all source files under `root` into memory in parallel (Rayon), parse them,
699    /// and return a `SharedIndex`.
700    ///
701    /// This function is **synchronous** — it must complete before the async tokio runtime
702    /// needs the index. Rayon handles internal parallelism.
703    pub fn load(root: &Path) -> anyhow::Result<SharedIndex> {
704        let start = Instant::now();
705
706        info!("LiveIndex::load starting at {:?}", root);
707
708        // 1. Discover ALL files (not just known-language ones) so the admission gate
709        //    can classify every file, including those with denylisted or unknown extensions.
710        let all_entries = discovery::discover_all_files(root)?;
711        info!(
712            "discovered {} total files (pre-admission)",
713            all_entries.len()
714        );
715
716        // 2. Run admission gate in parallel.
717        //    For files that pass Tier-1 initially (size/extension checks), we read content
718        //    and re-run the binary sniff before committing to parse.
719        //    Files that are non-Normal skip reading entirely.
720        use crate::discovery::classify_admission;
721        use crate::domain::index::{AdmissionTier, SkippedFile};
722
723        enum AdmissionOutcome {
724            Parse {
725                relative_path: String,
726                language: crate::domain::LanguageId,
727                classification: crate::domain::FileClassification,
728                bytes: Vec<u8>,
729                mtime_secs: u64,
730            },
731            Skip(SkippedFile),
732        }
733
734        let outcomes: Vec<AdmissionOutcome> = all_entries
735            .par_iter()
736            .filter_map(|entry| {
737                // Phase 1: size + extension check (no I/O beyond what the walk gave us).
738                let decision_pre = classify_admission(
739                    &entry.absolute_path,
740                    entry.file_size,
741                    None, // no content yet
742                );
743
744                match decision_pre.tier {
745                    AdmissionTier::HardSkip | AdmissionTier::MetadataOnly => {
746                        // No need to read content — already decided.
747                        let sf = SkippedFile {
748                            path: entry.relative_path.clone(),
749                            size: entry.file_size,
750                            extension: entry
751                                .absolute_path
752                                .extension()
753                                .and_then(|e| e.to_str())
754                                .map(|s| s.to_string()),
755                            decision: decision_pre,
756                        };
757                        return Some(AdmissionOutcome::Skip(sf));
758                    }
759                    AdmissionTier::Normal => {}
760                }
761
762                // Phase 2: we tentatively have Tier-1. If the file has no recognized
763                // language, we cannot parse it — skip it as metadata-only.
764                let language = match &entry.language {
765                    Some(lang) => lang.clone(),
766                    None => {
767                        // Unknown extension, not on denylist, under size limit.
768                        // Read content to do binary sniff, then store as skipped.
769                        let bytes = match std::fs::read(&entry.absolute_path) {
770                            Ok(b) => b,
771                            Err(e) => {
772                                warn!("failed to read {:?}: {}", entry.absolute_path, e);
773                                return None;
774                            }
775                        };
776                        let decision_post =
777                            classify_admission(&entry.absolute_path, entry.file_size, Some(&bytes));
778                        let sf = SkippedFile {
779                            path: entry.relative_path.clone(),
780                            size: entry.file_size,
781                            extension: entry
782                                .absolute_path
783                                .extension()
784                                .and_then(|e| e.to_str())
785                                .map(|s| s.to_string()),
786                            decision: decision_post,
787                        };
788                        return Some(AdmissionOutcome::Skip(sf));
789                    }
790                };
791
792                // Phase 3: read content and do binary sniff before passing to parser.
793                let bytes = match std::fs::read(&entry.absolute_path) {
794                    Ok(b) => b,
795                    Err(e) => {
796                        warn!("failed to read {:?}: {}", entry.absolute_path, e);
797                        return None;
798                    }
799                };
800                let mtime_secs = std::fs::metadata(&entry.absolute_path)
801                    .and_then(|m| m.modified())
802                    .ok()
803                    .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
804                    .map(|d| d.as_secs())
805                    .unwrap_or(0);
806
807                let decision_post =
808                    classify_admission(&entry.absolute_path, entry.file_size, Some(&bytes));
809
810                match decision_post.tier {
811                    AdmissionTier::HardSkip | AdmissionTier::MetadataOnly => {
812                        // Binary sniff reclassified this file — do NOT parse.
813                        let sf = SkippedFile {
814                            path: entry.relative_path.clone(),
815                            size: entry.file_size,
816                            extension: entry
817                                .absolute_path
818                                .extension()
819                                .and_then(|e| e.to_str())
820                                .map(|s| s.to_string()),
821                            decision: decision_post,
822                        };
823                        Some(AdmissionOutcome::Skip(sf))
824                    }
825                    AdmissionTier::Normal => Some(AdmissionOutcome::Parse {
826                        relative_path: entry.relative_path.clone(),
827                        language,
828                        classification: entry.classification,
829                        bytes,
830                        mtime_secs,
831                    }),
832                }
833            })
834            .collect();
835
836        // 3. Split outcomes into parse candidates and skipped files.
837        let mut skipped_files: Vec<SkippedFile> = Vec::new();
838        let mut to_parse: Vec<(
839            String,
840            crate::domain::LanguageId,
841            crate::domain::FileClassification,
842            Vec<u8>,
843            u64, // mtime_secs
844        )> = Vec::new();
845
846        for outcome in outcomes {
847            match outcome {
848                AdmissionOutcome::Skip(sf) => skipped_files.push(sf),
849                AdmissionOutcome::Parse {
850                    relative_path,
851                    language,
852                    classification,
853                    bytes,
854                    mtime_secs,
855                } => {
856                    to_parse.push((relative_path, language, classification, bytes, mtime_secs));
857                }
858            }
859        }
860
861        info!(
862            "admission gate: {} to parse, {} skipped",
863            to_parse.len(),
864            skipped_files.len()
865        );
866
867        // 4. Parse all admitted files in parallel via Rayon.
868        let mut parse_results: Vec<(String, IndexedFile)> = to_parse
869            .par_iter()
870            .map(
871                |(relative_path, language, classification, bytes, mtime_secs)| {
872                    let result = parsing::process_file_with_classification(
873                        relative_path,
874                        bytes,
875                        language.clone(),
876                        *classification,
877                    );
878                    let indexed = IndexedFile::from_parse_result(result, bytes.clone())
879                        .with_mtime(*mtime_secs);
880                    (relative_path.clone(), indexed)
881                },
882            )
883            .collect();
884
885        // 5. Sort by path for deterministic circuit-breaker evaluation order.
886        parse_results.sort_by(|a, b| a.0.cmp(&b.0));
887
888        // 6. Build HashMap sequentially, running circuit breaker checks.
889        let cb_state = CircuitBreakerState::from_env();
890        let mut files: HashMap<String, Arc<IndexedFile>> =
891            HashMap::with_capacity(parse_results.len());
892
893        let mut cb_tripped = false;
894        for (path, indexed_file) in parse_results {
895            match &indexed_file.parse_status {
896                ParseStatus::Failed { error } => {
897                    cb_state.record_failure(&path, error);
898                }
899                _ => {
900                    cb_state.record_success();
901                }
902            }
903
904            if cb_state.should_abort() {
905                let summary = cb_state.summary();
906                error!("{}", summary);
907                cb_tripped = true;
908                // Still insert the file before breaking
909                files.insert(path, Arc::new(indexed_file));
910                break;
911            }
912
913            files.insert(path, Arc::new(indexed_file));
914        }
915
916        if cb_tripped {
917            cb_state.tripped.store(true, Ordering::Relaxed);
918        }
919
920        let load_duration = start.elapsed();
921        info!(
922            "LiveIndex loaded: {} files, {} symbols, {} skipped, {:?}",
923            files.len(),
924            files.values().map(|f| f.symbols.len()).sum::<usize>(),
925            skipped_files.len(),
926            load_duration
927        );
928
929        let trigram_index = super::trigram::TrigramIndex::build_from_files(&files);
930        let gitignore = discovery::load_gitignore(root);
931
932        let mut index = LiveIndex {
933            files,
934            loaded_at: Instant::now(),
935            loaded_at_system: SystemTime::now(),
936            load_duration,
937            cb_state,
938            is_empty: false,
939            load_source: IndexLoadSource::FreshLoad,
940            snapshot_verify_state: SnapshotVerifyState::NotNeeded,
941            reverse_index: HashMap::new(),
942            files_by_basename: HashMap::new(),
943            files_by_dir_component: HashMap::new(),
944            trigram_index,
945            gitignore,
946            skipped_files,
947        };
948        index.rebuild_reverse_index();
949        index.rebuild_path_indices();
950
951        Ok(SharedIndexHandle::shared(index))
952    }
953
954    /// Create an empty `SharedIndex` with no files loaded.
955    ///
956    /// Used when `SYMFORGE_AUTO_INDEX=false`. The caller must call `reload()` to populate it.
957    /// Returns `IndexState::Empty` and `is_ready() == false` until reloaded.
958    pub fn empty() -> SharedIndex {
959        let index = LiveIndex {
960            files: HashMap::new(),
961            loaded_at: Instant::now(),
962            loaded_at_system: SystemTime::now(),
963            load_duration: Duration::ZERO,
964            cb_state: CircuitBreakerState::new(0.20),
965            is_empty: true,
966            load_source: IndexLoadSource::EmptyBootstrap,
967            snapshot_verify_state: SnapshotVerifyState::NotNeeded,
968            reverse_index: HashMap::new(),
969            files_by_basename: HashMap::new(),
970            files_by_dir_component: HashMap::new(),
971            trigram_index: super::trigram::TrigramIndex::new(),
972            gitignore: None,
973            skipped_files: Vec::new(),
974        };
975        SharedIndexHandle::shared(index)
976    }
977
978    pub fn add_skipped_file(&mut self, sf: SkippedFile) {
979        self.skipped_files.push(sf);
980    }
981
982    pub fn skipped_files(&self) -> &[SkippedFile] {
983        &self.skipped_files
984    }
985
986    /// Returns (tier1_count, tier2_count, tier3_count).
987    /// Tier 1 = number of indexed files (self.files.len()).
988    /// Tier 2/3 = from skipped_files.
989    pub fn tier_counts(&self) -> (usize, usize, usize) {
990        let tier1 = self.files.len();
991        let mut tier2 = 0;
992        let mut tier3 = 0;
993        for sf in &self.skipped_files {
994            match sf.tier() {
995                AdmissionTier::MetadataOnly => tier2 += 1,
996                AdmissionTier::HardSkip => tier3 += 1,
997                AdmissionTier::Normal => {} // shouldn't happen
998            }
999        }
1000        (tier1, tier2, tier3)
1001    }
1002
1003    /// Build reload data without holding any lock. Performs all file I/O and
1004    /// parsing via Rayon. The returned `ReloadData` is applied under the write
1005    /// lock via `apply_reload_data` — reducing lock hold time from seconds to
1006    /// milliseconds.
1007    pub(crate) fn build_reload_data(root: &Path) -> anyhow::Result<ReloadData> {
1008        let start = Instant::now();
1009
1010        info!("LiveIndex::build_reload_data starting at {:?}", root);
1011
1012        if !root.exists() {
1013            anyhow::bail!(
1014                "discovery error: root path does not exist: {}",
1015                root.display()
1016            );
1017        }
1018
1019        // 1. Discover all source files
1020        let discovered = discovery::discover_files(root)?;
1021        info!("discovered {} source files", discovered.len());
1022
1023        // 2. Parse all files in parallel via Rayon
1024        let parse_results: Vec<(String, IndexedFile)> = discovered
1025            .par_iter()
1026            .filter_map(|df| {
1027                let bytes = match std::fs::read(&df.absolute_path) {
1028                    Ok(b) => b,
1029                    Err(e) => {
1030                        warn!("failed to read {:?}: {}", df.absolute_path, e);
1031                        return None;
1032                    }
1033                };
1034
1035                let mtime_secs = std::fs::metadata(&df.absolute_path)
1036                    .and_then(|m| m.modified())
1037                    .ok()
1038                    .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
1039                    .map(|d| d.as_secs())
1040                    .unwrap_or(0);
1041
1042                let result = parsing::process_file_with_classification(
1043                    &df.relative_path,
1044                    &bytes,
1045                    df.language.clone(),
1046                    df.classification,
1047                );
1048                let indexed = IndexedFile::from_parse_result(result, bytes).with_mtime(mtime_secs);
1049                Some((df.relative_path.clone(), indexed))
1050            })
1051            .collect();
1052
1053        // 3. Build new file map with fresh circuit breaker
1054        let new_cb = CircuitBreakerState::from_env();
1055        let mut new_files: HashMap<String, Arc<IndexedFile>> =
1056            HashMap::with_capacity(parse_results.len());
1057
1058        let mut cb_tripped = false;
1059        for (path, indexed_file) in parse_results {
1060            match &indexed_file.parse_status {
1061                ParseStatus::Failed { error } => {
1062                    new_cb.record_failure(&path, error);
1063                }
1064                _ => {
1065                    new_cb.record_success();
1066                }
1067            }
1068
1069            if new_cb.should_abort() {
1070                let summary = new_cb.summary();
1071                error!("{}", summary);
1072                cb_tripped = true;
1073                new_files.insert(path, Arc::new(indexed_file));
1074                break;
1075            }
1076
1077            new_files.insert(path, Arc::new(indexed_file));
1078        }
1079
1080        if cb_tripped {
1081            new_cb.tripped.store(true, Ordering::Relaxed);
1082        }
1083
1084        let load_duration = start.elapsed();
1085        info!(
1086            "LiveIndex::build_reload_data done: {} files, {} symbols, {:?}",
1087            new_files.len(),
1088            new_files.values().map(|f| f.symbols.len()).sum::<usize>(),
1089            load_duration
1090        );
1091
1092        // Pre-build all derived indices outside any lock.
1093        let derived = DerivedIndices::build_from_files(&new_files);
1094
1095        Ok(ReloadData {
1096            files: new_files,
1097            cb_state: new_cb,
1098            load_duration,
1099            gitignore: discovery::load_gitignore(root),
1100            derived,
1101            skipped_files: Vec::new(),
1102        })
1103    }
1104
1105    /// Apply pre-built reload data under the write lock. Pure field assignment —
1106    /// all derived indices are pre-built in `ReloadData`, so this takes
1107    /// microseconds instead of milliseconds. Cannot fail.
1108    pub(crate) fn apply_reload_data(&mut self, data: ReloadData) {
1109        self.files = data.files;
1110        self.loaded_at = Instant::now();
1111        self.loaded_at_system = SystemTime::now();
1112        self.load_duration = data.load_duration;
1113        self.cb_state = data.cb_state;
1114        self.is_empty = false;
1115        self.load_source = IndexLoadSource::FreshLoad;
1116        self.snapshot_verify_state = SnapshotVerifyState::NotNeeded;
1117        self.trigram_index = data.derived.trigram_index;
1118        self.reverse_index = data.derived.reverse_index;
1119        self.files_by_basename = data.derived.files_by_basename;
1120        self.files_by_dir_component = data.derived.files_by_dir_component;
1121        self.gitignore = data.gitignore;
1122        self.skipped_files = data.skipped_files;
1123    }
1124
1125    /// Replaces all files, resets circuit breaker, and updates timestamps.
1126    /// On success sets `is_empty = false`. On error the index remains in its previous state
1127    /// (but partial results may have been loaded).
1128    ///
1129    /// NOTE: This method does all I/O under `&mut self`. Prefer calling
1130    /// `build_reload_data` outside the lock and then `apply_reload_data` under
1131    /// the lock when called via `SharedIndexHandle::reload`.
1132    pub fn reload(&mut self, root: &Path) -> anyhow::Result<()> {
1133        let data = Self::build_reload_data(root)?;
1134        self.apply_reload_data(data);
1135        Ok(())
1136    }
1137
1138    /// Insert or replace a single file in the index without a full reload.
1139    ///
1140    /// Updates `loaded_at_system` to reflect the mutation time.
1141    /// If the file already exists, its entry is replaced atomically.
1142    pub fn update_file(&mut self, path: String, file: IndexedFile) {
1143        // Capture old reference names BEFORE replacing the file, so we can
1144        // clean up stale reverse index entries after the insert.
1145        let old_ref_names: Vec<String> = self
1146            .files
1147            .get(&path)
1148            .map(|f| f.references.iter().map(|r| r.name.clone()).collect())
1149            .unwrap_or_default();
1150        let had_existing = !old_ref_names.is_empty() || self.files.contains_key(&path);
1151
1152        // SAFETY: Insert the new file into the primary store FIRST.
1153        // This ensures the file is always present in `self.files` even if
1154        // auxiliary index updates panic (e.g., from concurrent access or
1155        // gitignore assertion failures). Auxiliary indices may become
1156        // temporarily stale, but the file won't vanish from the index.
1157        self.files.insert(path.clone(), Arc::new(file));
1158
1159        // Clean up old auxiliary indices using captured state.
1160        if had_existing {
1161            self.remove_path_indices_for_path(&path);
1162        }
1163        // Remove old reverse index entries using the captured old reference names
1164        // (not the new file's references, which are already in self.files).
1165        for name in &old_ref_names {
1166            if let Some(locs) = self.reverse_index.get_mut(name) {
1167                locs.retain(|loc| loc.file_path != path);
1168                if locs.is_empty() {
1169                    self.reverse_index.remove(name);
1170                }
1171            }
1172        }
1173        self.trigram_index
1174            .update_file(&path, &self.files[&path].content);
1175        self.insert_reverse_index_for_path(&path);
1176        self.insert_path_indices_for_path(&path);
1177        self.is_empty = false;
1178        self.loaded_at_system = SystemTime::now();
1179    }
1180
1181    /// Insert a new file into the index (alias for `update_file`).
1182    ///
1183    /// Semantically identical to `update_file` — if the file already exists
1184    /// it is replaced. The name `add_file` is provided for clarity at call sites
1185    /// where the caller knows the file is new.
1186    pub fn add_file(&mut self, path: String, file: IndexedFile) {
1187        self.update_file(path, file);
1188    }
1189
1190    /// Remove a single file from the index by its relative path.
1191    ///
1192    /// If the path is not present, this is a no-op (no timestamp update).
1193    /// If the path is found and removed, `loaded_at_system` is updated.
1194    pub fn remove_file(&mut self, path: &str) {
1195        self.remove_reverse_index_for_path(path);
1196        if self.files.remove(path).is_some() {
1197            self.trigram_index.remove_file(path);
1198            self.remove_path_indices_for_path(path);
1199            self.loaded_at_system = SystemTime::now();
1200        }
1201    }
1202
1203    /// Remove reverse index entries for a single file path.
1204    /// Must be called BEFORE removing the file from `self.files`.
1205    fn remove_reverse_index_for_path(&mut self, path: &str) {
1206        if let Some(file) = self.files.get(path) {
1207            let names: Vec<String> = file.references.iter().map(|r| r.name.clone()).collect();
1208            for name in names {
1209                if let Some(locs) = self.reverse_index.get_mut(&name) {
1210                    locs.retain(|loc| loc.file_path != path);
1211                    if locs.is_empty() {
1212                        self.reverse_index.remove(&name);
1213                    }
1214                }
1215            }
1216        }
1217    }
1218
1219    /// Insert reverse index entries for a single file path.
1220    /// Must be called AFTER inserting the file into `self.files`.
1221    fn insert_reverse_index_for_path(&mut self, path: &str) {
1222        if let Some(file) = self.files.get(path) {
1223            for (reference_idx, reference) in file.references.iter().enumerate() {
1224                self.reverse_index
1225                    .entry(reference.name.clone())
1226                    .or_default()
1227                    .push(ReferenceLocation {
1228                        file_path: path.to_string(),
1229                        reference_idx: reference_idx as u32,
1230                    });
1231            }
1232        }
1233    }
1234
1235    /// Rebuild `reverse_index` from scratch using current `self.files`.
1236    ///
1237    /// Used by incremental callers (load, snapshot restore, tests).
1238    /// For bulk reload, prefer `DerivedIndices::build_from_files` outside the lock.
1239    pub(crate) fn rebuild_reverse_index(&mut self) {
1240        self.reverse_index = build_reverse_index_from_files(&self.files);
1241    }
1242
1243    /// Rebuild path indices (basename + dir component) from current `self.files`.
1244    ///
1245    /// Used by incremental callers (load, snapshot restore, tests).
1246    /// For bulk reload, prefer `DerivedIndices::build_from_files` outside the lock.
1247    pub(crate) fn rebuild_path_indices(&mut self) {
1248        let (by_basename, by_dir_component) = build_path_indices_from_files(&self.files);
1249        self.files_by_basename = by_basename;
1250        self.files_by_dir_component = by_dir_component;
1251    }
1252
1253    /// Repair all auxiliary indices for a single file path after a panic or
1254    /// detected corruption.
1255    ///
1256    /// Performs a thorough O(reverse_index_size) scan to remove ALL stale entries
1257    /// pointing to this path, then rebuilds from the primary store. Only called
1258    /// on the exceptional panic-recovery path, never during normal operations.
1259    pub(crate) fn repair_file_indices(&mut self, path: &str) {
1260        // 1. Thorough reverse index cleanup: scan ALL entries, not just those
1261        //    matching current file references (old refs may differ from new).
1262        self.reverse_index.retain(|_name, locs| {
1263            locs.retain(|loc| loc.file_path != path);
1264            !locs.is_empty()
1265        });
1266
1267        // 2. Remove path indices (basename + dir component).
1268        self.remove_path_indices_for_path(path);
1269
1270        // 3. Rebuild from primary store state.
1271        if self.files.contains_key(path) {
1272            if let Some(file) = self.files.get(path) {
1273                self.trigram_index.update_file(path, &file.content);
1274            }
1275            self.insert_reverse_index_for_path(path);
1276            self.insert_path_indices_for_path(path);
1277        } else {
1278            self.trigram_index.remove_file(path);
1279        }
1280
1281        tracing::info!("repaired auxiliary indices for '{path}'");
1282    }
1283
1284    fn insert_path_indices_for_path(&mut self, path: &str) {
1285        if let Some(basename) = basename_key(path) {
1286            insert_sorted_unique(self.files_by_basename.entry(basename).or_default(), path);
1287        }
1288
1289        for component in dir_component_keys(path) {
1290            insert_sorted_unique(
1291                self.files_by_dir_component.entry(component).or_default(),
1292                path,
1293            );
1294        }
1295    }
1296
1297    fn remove_path_indices_for_path(&mut self, path: &str) {
1298        if let Some(basename) = basename_key(path)
1299            && let Some(paths) = self.files_by_basename.get_mut(&basename)
1300        {
1301            remove_sorted_path(paths, path);
1302            if paths.is_empty() {
1303                self.files_by_basename.remove(&basename);
1304            }
1305        }
1306
1307        for component in dir_component_keys(path) {
1308            if let Some(paths) = self.files_by_dir_component.get_mut(&component) {
1309                remove_sorted_path(paths, path);
1310                if paths.is_empty() {
1311                    self.files_by_dir_component.remove(&component);
1312                }
1313            }
1314        }
1315    }
1316
1317    /// Returns where the current in-memory contents came from.
1318    pub fn load_source(&self) -> IndexLoadSource {
1319        self.load_source
1320    }
1321
1322    /// Returns the current snapshot reconciliation state.
1323    pub fn snapshot_verify_state(&self) -> SnapshotVerifyState {
1324        self.snapshot_verify_state
1325    }
1326
1327    pub(crate) fn mark_snapshot_verify_running(&mut self) {
1328        if self.load_source == IndexLoadSource::SnapshotRestore {
1329            self.snapshot_verify_state = SnapshotVerifyState::Running;
1330        }
1331    }
1332
1333    pub(crate) fn mark_snapshot_verify_completed(&mut self) {
1334        if self.load_source == IndexLoadSource::SnapshotRestore {
1335            self.snapshot_verify_state = SnapshotVerifyState::Completed;
1336        }
1337    }
1338}
1339
1340fn basename_key(path: &str) -> Option<String> {
1341    Path::new(path)
1342        .file_name()
1343        .and_then(|name| name.to_str())
1344        .map(|name| name.to_ascii_lowercase())
1345}
1346
1347fn dir_component_keys(path: &str) -> Vec<String> {
1348    let components: Vec<&str> = path
1349        .split(['/', '\\'])
1350        .filter(|component| !component.is_empty())
1351        .collect();
1352    if components.len() <= 1 {
1353        return Vec::new();
1354    }
1355
1356    let mut seen = HashSet::new();
1357    let mut keys = Vec::new();
1358    for component in &components[..components.len() - 1] {
1359        let key = component.to_ascii_lowercase();
1360        if seen.insert(key.clone()) {
1361            keys.push(key);
1362        }
1363    }
1364    keys.sort();
1365    keys
1366}
1367
1368fn insert_sorted_unique(paths: &mut Vec<String>, path: &str) {
1369    match paths.binary_search_by(|existing| existing.as_str().cmp(path)) {
1370        Ok(_) => {}
1371        Err(pos) => paths.insert(pos, path.to_string()),
1372    }
1373}
1374
1375fn remove_sorted_path(paths: &mut Vec<String>, path: &str) {
1376    if let Ok(pos) = paths.binary_search_by(|existing| existing.as_str().cmp(path)) {
1377        paths.remove(pos);
1378    }
1379}
1380
1381#[cfg(test)]
1382mod tests {
1383    use super::*;
1384    use crate::domain::{
1385        FileOutcome, LanguageId, ReferenceKind, ReferenceRecord, SymbolKind, SymbolRecord,
1386    };
1387    use std::fs;
1388    use tempfile::TempDir;
1389
1390    fn dummy_symbol() -> SymbolRecord {
1391        let byte_range = (0, 10);
1392        SymbolRecord {
1393            name: "foo".to_string(),
1394            kind: SymbolKind::Function,
1395            depth: 0,
1396            sort_order: 0,
1397            byte_range,
1398            item_byte_range: Some(byte_range),
1399            line_range: (0, 1),
1400            doc_byte_range: None,
1401        }
1402    }
1403
1404    fn make_result(outcome: FileOutcome, symbols: Vec<SymbolRecord>) -> FileProcessingResult {
1405        FileProcessingResult {
1406            relative_path: "test.rs".to_string(),
1407            language: LanguageId::Rust,
1408            classification: crate::domain::FileClassification::for_code_path("test.rs"),
1409            outcome,
1410            parse_diagnostic: None,
1411            symbols,
1412            byte_len: 42,
1413            content_hash: "abc123".to_string(),
1414            references: vec![],
1415            alias_map: std::collections::HashMap::new(),
1416        }
1417    }
1418
1419    // --- IndexedFile::from_parse_result ---
1420
1421    #[test]
1422    fn test_indexed_file_maps_processed_status() {
1423        let result = make_result(FileOutcome::Processed, vec![dummy_symbol()]);
1424        let indexed = IndexedFile::from_parse_result(result, b"fn foo() {}".to_vec());
1425        assert_eq!(indexed.parse_status, ParseStatus::Parsed);
1426        assert_eq!(indexed.symbols.len(), 1);
1427    }
1428
1429    #[test]
1430    fn test_indexed_file_maps_partial_parse_keeps_symbols() {
1431        let result = make_result(
1432            FileOutcome::PartialParse {
1433                warning: "syntax error".to_string(),
1434            },
1435            vec![dummy_symbol()],
1436        );
1437        let indexed = IndexedFile::from_parse_result(result, b"fn bad(".to_vec());
1438        assert!(matches!(
1439            indexed.parse_status,
1440            ParseStatus::PartialParse { .. }
1441        ));
1442        assert_eq!(
1443            indexed.symbols.len(),
1444            1,
1445            "symbols kept even on partial parse"
1446        );
1447    }
1448
1449    #[test]
1450    fn test_indexed_file_maps_failed_status_empty_symbols_content_preserved() {
1451        let result = make_result(
1452            FileOutcome::Failed {
1453                error: "parse failed".to_string(),
1454            },
1455            vec![],
1456        );
1457        let content = b"some content bytes".to_vec();
1458        let indexed = IndexedFile::from_parse_result(result, content.clone());
1459        assert!(matches!(indexed.parse_status, ParseStatus::Failed { .. }));
1460        assert!(indexed.symbols.is_empty(), "failed parse has no symbols");
1461        assert_eq!(
1462            indexed.content, content,
1463            "content bytes stored even on failure"
1464        );
1465    }
1466
1467    // --- CircuitBreakerState ---
1468
1469    #[test]
1470    fn test_circuit_breaker_does_not_trip_at_20pct_of_10_files() {
1471        // 20% of 10 = exactly threshold — NOT exceeded
1472        let cb = CircuitBreakerState::new(0.20);
1473        for _ in 0..8 {
1474            cb.record_success();
1475        }
1476        for i in 0..2 {
1477            cb.record_failure(&format!("file{i}.rs"), "error");
1478        }
1479        assert!(
1480            !cb.should_abort(),
1481            "2/10 = 20% should NOT trip (threshold not exceeded)"
1482        );
1483    }
1484
1485    #[test]
1486    fn test_circuit_breaker_trips_at_30pct_of_10_files() {
1487        // 30% > 20% threshold — SHOULD trip
1488        let cb = CircuitBreakerState::new(0.20);
1489        for _ in 0..7 {
1490            cb.record_success();
1491        }
1492        for i in 0..3 {
1493            cb.record_failure(&format!("file{i}.rs"), "error");
1494        }
1495        assert!(cb.should_abort(), "3/10 = 30% should trip");
1496    }
1497
1498    #[test]
1499    fn test_circuit_breaker_does_not_trip_on_tiny_repos() {
1500        // Fewer than 5 files processed — minimum-file guard must prevent tripping
1501        let cb = CircuitBreakerState::new(0.20);
1502        cb.record_failure("a.rs", "err");
1503        cb.record_failure("b.rs", "err");
1504        cb.record_failure("c.rs", "err");
1505        // 3 total, all failed — but < 5 minimum threshold
1506        assert!(
1507            !cb.should_abort(),
1508            "< 5 files processed: circuit breaker must not trip"
1509        );
1510    }
1511
1512    #[test]
1513    fn test_circuit_breaker_threshold_configurable() {
1514        // Use a strict threshold of 0.10 (10%)
1515        let cb = CircuitBreakerState::new(0.10);
1516        for _ in 0..9 {
1517            cb.record_success();
1518        }
1519        cb.record_failure("file.rs", "error");
1520        // 1/10 = 10% = threshold, NOT exceeded
1521        assert!(!cb.should_abort(), "10% == threshold, not exceeded");
1522
1523        // Now one more failure puts it at 2/11 ~ 18.2% > 10% — but we add 1 more success first
1524        let cb2 = CircuitBreakerState::new(0.10);
1525        for _ in 0..8 {
1526            cb2.record_success();
1527        }
1528        for i in 0..2 {
1529            cb2.record_failure(&format!("file{i}.rs"), "error");
1530        }
1531        // 2/10 = 20% > 10% threshold
1532        assert!(cb2.should_abort(), "20% > 10% threshold should trip");
1533    }
1534
1535    // --- LiveIndex::load ---
1536
1537    fn write_file(dir: &Path, name: &str, content: &str) {
1538        let path = dir.join(name);
1539        if let Some(p) = path.parent() {
1540            fs::create_dir_all(p).unwrap();
1541        }
1542        fs::write(path, content).unwrap();
1543    }
1544
1545    #[test]
1546    fn test_live_index_load_valid_files_produces_ready_state() {
1547        let tmp = TempDir::new().unwrap();
1548        write_file(tmp.path(), "a.rs", "fn alpha() {}");
1549        write_file(tmp.path(), "b.py", "def beta(): pass");
1550        write_file(tmp.path(), "c.js", "function gamma() {}");
1551        write_file(tmp.path(), "d.ts", "function delta(): void {}");
1552        write_file(tmp.path(), "e.go", "package main\nfunc epsilon() {}");
1553
1554        let shared = LiveIndex::load(tmp.path()).unwrap();
1555        let index = shared.read();
1556        assert!(
1557            !index.cb_state.is_tripped(),
1558            "valid files should not trip circuit breaker"
1559        );
1560        assert_eq!(index.file_count(), 5);
1561        assert_eq!(index.load_source(), IndexLoadSource::FreshLoad);
1562        assert_eq!(
1563            index.snapshot_verify_state(),
1564            SnapshotVerifyState::NotNeeded
1565        );
1566    }
1567
1568    #[test]
1569    fn test_live_index_load_circuit_breaker_not_tripped_with_all_languages() {
1570        // All 16 languages now parse successfully (tree-sitter 0.26 + ABI-compatible grammars).
1571        // A mix of language files should not trip the circuit breaker.
1572        let tmp = TempDir::new().unwrap();
1573        write_file(tmp.path(), "a.rs", "fn alpha() {}");
1574        write_file(tmp.path(), "b.py", "def beta(): pass");
1575        write_file(tmp.path(), "c.js", "function gamma() {}");
1576        // Swift, PHP, Perl now parse successfully — CB should not trip
1577        write_file(tmp.path(), "x.swift", "class A {}");
1578        write_file(tmp.path(), "y.php", "<?php class B {}");
1579        write_file(tmp.path(), "z.pl", "sub greet { print \"hi\"; }");
1580
1581        let shared = LiveIndex::load(tmp.path()).unwrap();
1582        let index = shared.read();
1583        assert!(
1584            !index.cb_state.is_tripped(),
1585            "all-parseable files should not trip circuit breaker"
1586        );
1587    }
1588
1589    #[test]
1590    fn test_live_index_file_count() {
1591        let tmp = TempDir::new().unwrap();
1592        write_file(tmp.path(), "a.rs", "fn a() {}");
1593        write_file(tmp.path(), "b.rs", "fn b() {}");
1594        write_file(tmp.path(), "c.rs", "fn c() {}");
1595
1596        let shared = LiveIndex::load(tmp.path()).unwrap();
1597        let index = shared.read();
1598        assert_eq!(index.file_count(), 3);
1599    }
1600
1601    #[test]
1602    fn test_live_index_symbol_count() {
1603        let tmp = TempDir::new().unwrap();
1604        write_file(tmp.path(), "a.rs", "fn foo() {}\nfn bar() {}");
1605        write_file(tmp.path(), "b.rs", "fn baz() {}");
1606
1607        let shared = LiveIndex::load(tmp.path()).unwrap();
1608        let index = shared.read();
1609        // a.rs: 2 symbols, b.rs: 1 symbol → total 3
1610        assert_eq!(index.symbol_count(), 3);
1611    }
1612
1613    // --- LiveIndex::empty() and reload() ---
1614
1615    #[test]
1616    fn test_live_index_empty_has_zero_files() {
1617        let shared = LiveIndex::empty();
1618        let index = shared.read();
1619        assert_eq!(index.file_count(), 0);
1620        assert_eq!(index.load_source(), IndexLoadSource::EmptyBootstrap);
1621        assert_eq!(
1622            index.snapshot_verify_state(),
1623            SnapshotVerifyState::NotNeeded
1624        );
1625    }
1626
1627    #[test]
1628    fn test_shared_index_handle_preserves_read_write_access() {
1629        let shared = LiveIndex::empty();
1630        {
1631            let mut live = shared.write();
1632            live.add_file(
1633                "src/new.rs".to_string(),
1634                make_indexed_file_for_mutation("src/new.rs"),
1635            );
1636        }
1637
1638        let index = shared.read();
1639        assert!(index.get_file("src/new.rs").is_some());
1640    }
1641
1642    #[test]
1643    fn test_shared_index_handle_published_state_tracks_generation_and_counts() {
1644        let shared = LiveIndex::empty();
1645        let initial = shared.published_state();
1646        assert_eq!(initial.generation, 0);
1647        assert_eq!(initial.status, PublishedIndexStatus::Empty);
1648        assert_eq!(initial.degraded_summary, None);
1649        assert_eq!(initial.file_count, 0);
1650        assert_eq!(initial.parsed_count, 0);
1651        assert_eq!(initial.partial_parse_count, 0);
1652        assert_eq!(initial.failed_count, 0);
1653        assert_eq!(initial.load_source, IndexLoadSource::EmptyBootstrap);
1654
1655        shared.add_file(
1656            "src/new.rs".to_string(),
1657            make_indexed_file_for_mutation("src/new.rs"),
1658        );
1659        let after_add = shared.published_state();
1660        assert_eq!(after_add.generation, 1);
1661        assert_eq!(after_add.status, PublishedIndexStatus::Ready);
1662        assert_eq!(after_add.degraded_summary, None);
1663        assert_eq!(after_add.file_count, 1);
1664        assert_eq!(after_add.parsed_count, 1);
1665        assert_eq!(after_add.partial_parse_count, 0);
1666        assert_eq!(after_add.failed_count, 0);
1667        assert_eq!(after_add.symbol_count, 1);
1668
1669        shared.remove_file("src/new.rs");
1670        let after_remove = shared.published_state();
1671        assert_eq!(after_remove.generation, 2);
1672        assert_eq!(after_remove.status, PublishedIndexStatus::Ready);
1673        assert_eq!(after_remove.degraded_summary, None);
1674        assert_eq!(after_remove.file_count, 0);
1675        assert_eq!(after_remove.symbol_count, 0);
1676    }
1677
1678    #[test]
1679    fn test_shared_index_handle_write_guard_publishes_on_drop() {
1680        let shared = LiveIndex::empty();
1681
1682        {
1683            let mut live = shared.write();
1684            live.add_file(
1685                "src/new.rs".to_string(),
1686                make_indexed_file_for_mutation("src/new.rs"),
1687            );
1688        }
1689
1690        let after_add = shared.published_state();
1691        assert_eq!(after_add.generation, 1);
1692        assert_eq!(after_add.status, PublishedIndexStatus::Ready);
1693        assert_eq!(after_add.degraded_summary, None);
1694        assert_eq!(after_add.file_count, 1);
1695
1696        {
1697            let mut live = shared.write();
1698            live.remove_file("src/new.rs");
1699        }
1700
1701        let after_remove = shared.published_state();
1702        assert_eq!(after_remove.generation, 2);
1703        assert_eq!(after_remove.status, PublishedIndexStatus::Ready);
1704        assert_eq!(after_remove.degraded_summary, None);
1705        assert_eq!(after_remove.file_count, 0);
1706    }
1707
1708    #[test]
1709    fn test_shared_index_handle_published_state_tracks_verify_transitions() {
1710        let mut live = make_empty_live_index();
1711        live.is_empty = false;
1712        live.load_source = IndexLoadSource::SnapshotRestore;
1713        live.snapshot_verify_state = SnapshotVerifyState::Pending;
1714        let shared = SharedIndexHandle::shared(live);
1715
1716        shared.mark_snapshot_verify_running();
1717        let running = shared.published_state();
1718        assert_eq!(running.generation, 1);
1719        assert_eq!(running.status, PublishedIndexStatus::Ready);
1720        assert_eq!(running.degraded_summary, None);
1721        assert_eq!(running.snapshot_verify_state, SnapshotVerifyState::Running);
1722
1723        shared.mark_snapshot_verify_completed();
1724        let completed = shared.published_state();
1725        assert_eq!(completed.generation, 2);
1726        assert_eq!(
1727            completed.snapshot_verify_state,
1728            SnapshotVerifyState::Completed
1729        );
1730    }
1731
1732    #[test]
1733    fn test_shared_index_handle_published_state_captures_degraded_summary() {
1734        let mut live = make_empty_live_index();
1735        live.is_empty = false;
1736        for _ in 0..3 {
1737            live.cb_state.record_failure("src/bad.rs", "parse failure");
1738        }
1739        for _ in 0..7 {
1740            live.cb_state.record_success();
1741        }
1742        assert!(live.cb_state.should_abort(), "circuit breaker should trip");
1743        let shared = SharedIndexHandle::shared(live);
1744
1745        let published = shared.published_state();
1746        assert_eq!(published.status, PublishedIndexStatus::Degraded);
1747        assert!(
1748            published
1749                .degraded_summary
1750                .as_deref()
1751                .is_some_and(|summary| summary.contains("circuit breaker tripped")),
1752            "expected degraded summary, got {:?}",
1753            published.degraded_summary
1754        );
1755    }
1756
1757    #[test]
1758    fn test_shared_index_handle_published_repo_outline_tracks_mutations() {
1759        let shared = LiveIndex::empty();
1760
1761        let initial = shared.published_repo_outline();
1762        assert_eq!(initial.total_files, 0);
1763        assert_eq!(initial.total_symbols, 0);
1764        assert!(initial.files.is_empty());
1765
1766        shared.add_file(
1767            "src/main.rs".to_string(),
1768            make_indexed_file_for_mutation("src/main.rs"),
1769        );
1770        let after_add = shared.published_repo_outline();
1771        assert_eq!(after_add.total_files, 1);
1772        assert_eq!(after_add.total_symbols, 1);
1773        assert_eq!(after_add.files[0].relative_path, "src/main.rs");
1774
1775        {
1776            let mut live = shared.write();
1777            live.remove_file("src/main.rs");
1778        }
1779        let after_remove = shared.published_repo_outline();
1780        assert_eq!(after_remove.total_files, 0);
1781        assert_eq!(after_remove.total_symbols, 0);
1782        assert!(after_remove.files.is_empty());
1783    }
1784
1785    #[test]
1786    fn test_live_index_empty_returns_empty_state() {
1787        let shared = LiveIndex::empty();
1788        let index = shared.read();
1789        assert_eq!(index.index_state(), IndexState::Empty);
1790    }
1791
1792    #[test]
1793    fn test_live_index_empty_is_not_ready() {
1794        let shared = LiveIndex::empty();
1795        let index = shared.read();
1796        assert!(!index.is_ready(), "empty index should not be ready");
1797    }
1798
1799    #[test]
1800    fn test_live_index_reload_loads_files_and_becomes_ready() {
1801        let tmp = TempDir::new().unwrap();
1802        write_file(tmp.path(), "a.rs", "fn alpha() {}");
1803        write_file(tmp.path(), "b.rs", "fn beta() {}");
1804
1805        let shared = LiveIndex::empty();
1806        {
1807            let mut index = shared.write();
1808            index.reload(tmp.path()).expect("reload should succeed");
1809        }
1810        let index = shared.read();
1811        assert_eq!(index.file_count(), 2);
1812        assert!(index.is_ready(), "after reload should be ready");
1813        assert_eq!(index.index_state(), IndexState::Ready);
1814        assert_eq!(index.load_source(), IndexLoadSource::FreshLoad);
1815        assert_eq!(
1816            index.snapshot_verify_state(),
1817            SnapshotVerifyState::NotNeeded
1818        );
1819    }
1820
1821    #[test]
1822    fn test_live_index_reload_invalid_root_returns_error() {
1823        let shared = LiveIndex::empty();
1824        let mut index = shared.write();
1825        let result = index.reload(Path::new("/nonexistent/path/that/does/not/exist"));
1826        assert!(
1827            result.is_err(),
1828            "reload on invalid root should return error"
1829        );
1830    }
1831
1832    #[test]
1833    fn test_live_index_loaded_at_system_is_recent() {
1834        use std::time::SystemTime;
1835        let before = SystemTime::now();
1836        let shared = LiveIndex::empty();
1837        let index = shared.read();
1838        let after = SystemTime::now();
1839        let ts = index.loaded_at_system();
1840        assert!(
1841            ts >= before,
1842            "loaded_at_system should be >= before creation"
1843        );
1844        assert!(ts <= after, "loaded_at_system should be <= after creation");
1845    }
1846
1847    #[test]
1848    fn test_concurrent_readers_no_deadlock() {
1849        use std::thread;
1850
1851        let tmp = TempDir::new().unwrap();
1852        write_file(tmp.path(), "a.rs", "fn foo() {}");
1853        write_file(tmp.path(), "b.rs", "fn bar() {}");
1854        write_file(tmp.path(), "c.rs", "fn baz() {}");
1855
1856        let shared = LiveIndex::load(tmp.path()).unwrap();
1857
1858        let handles: Vec<_> = (0..8)
1859            .map(|_| {
1860                let shared_clone = Arc::clone(&shared);
1861                thread::spawn(move || {
1862                    let index = shared_clone.read();
1863                    let _ = index.file_count();
1864                    let _ = index.symbol_count();
1865                })
1866            })
1867            .collect();
1868
1869        for h in handles {
1870            h.join().expect("reader thread should not panic");
1871        }
1872    }
1873
1874    // --- LiveIndex mutation methods ---
1875
1876    fn make_indexed_file_for_mutation(path: &str) -> IndexedFile {
1877        IndexedFile {
1878            relative_path: path.to_string(),
1879            language: LanguageId::Rust,
1880            classification: crate::domain::FileClassification::for_code_path(path),
1881            content: b"fn test() {}".to_vec(),
1882            symbols: vec![dummy_symbol()],
1883            parse_status: ParseStatus::Parsed,
1884            parse_diagnostic: None,
1885            byte_len: 12,
1886            content_hash: "abc123".to_string(),
1887            references: vec![],
1888            alias_map: std::collections::HashMap::new(),
1889            mtime_secs: 0,
1890        }
1891    }
1892
1893    fn make_empty_live_index() -> LiveIndex {
1894        LiveIndex {
1895            files: HashMap::new(),
1896            loaded_at: Instant::now(),
1897            loaded_at_system: SystemTime::now(),
1898            load_duration: Duration::ZERO,
1899            cb_state: CircuitBreakerState::new(0.20),
1900            is_empty: false,
1901            load_source: IndexLoadSource::FreshLoad,
1902            snapshot_verify_state: SnapshotVerifyState::NotNeeded,
1903            reverse_index: HashMap::new(),
1904            files_by_basename: HashMap::new(),
1905            files_by_dir_component: HashMap::new(),
1906            trigram_index: crate::live_index::trigram::TrigramIndex::new(),
1907            gitignore: None,
1908            skipped_files: Vec::new(),
1909        }
1910    }
1911
1912    #[test]
1913    fn test_live_index_load_builds_path_indices() {
1914        let dir = TempDir::new().expect("failed to create tempdir");
1915        fs::create_dir_all(dir.path().join("src")).expect("failed to create src dir");
1916        fs::create_dir_all(dir.path().join("tests")).expect("failed to create tests dir");
1917        write_file(dir.path(), "src/lib.rs", "pub fn lib_fn() {}");
1918        write_file(dir.path(), "tests/lib.rs", "fn test_lib() {}");
1919
1920        let shared = LiveIndex::load(dir.path()).expect("LiveIndex::load failed");
1921        let index = shared.read();
1922
1923        assert_eq!(
1924            index.files_by_basename.get("lib.rs"),
1925            Some(&vec!["src/lib.rs".to_string(), "tests/lib.rs".to_string()])
1926        );
1927        assert_eq!(
1928            index.files_by_dir_component.get("src"),
1929            Some(&vec!["src/lib.rs".to_string()])
1930        );
1931        assert_eq!(
1932            index.files_by_dir_component.get("tests"),
1933            Some(&vec!["tests/lib.rs".to_string()])
1934        );
1935    }
1936
1937    #[test]
1938    fn test_live_index_reload_rebuilds_path_indices() {
1939        let dir = TempDir::new().expect("failed to create tempdir");
1940        fs::create_dir_all(dir.path().join("src")).expect("failed to create src dir");
1941        write_file(dir.path(), "src/alpha.rs", "fn alpha() {}");
1942
1943        let shared = LiveIndex::load(dir.path()).expect("LiveIndex::load failed");
1944
1945        fs::remove_file(dir.path().join("src/alpha.rs")).expect("failed to remove alpha");
1946        fs::create_dir_all(dir.path().join("tests")).expect("failed to create tests dir");
1947        write_file(dir.path(), "tests/beta.rs", "fn beta() {}");
1948
1949        {
1950            let mut index = shared.write();
1951            index.reload(dir.path()).expect("reload should succeed");
1952        }
1953
1954        let index = shared.read();
1955        assert!(!index.files_by_basename.contains_key("alpha.rs"));
1956        assert_eq!(
1957            index.files_by_basename.get("beta.rs"),
1958            Some(&vec!["tests/beta.rs".to_string()])
1959        );
1960        assert!(!index.files_by_dir_component.contains_key("src"));
1961        assert_eq!(
1962            index.files_by_dir_component.get("tests"),
1963            Some(&vec!["tests/beta.rs".to_string()])
1964        );
1965    }
1966
1967    #[test]
1968    fn test_dir_component_keys_deduplicate_and_accept_backslashes() {
1969        assert_eq!(
1970            dir_component_keys("src\\live_index\\src\\store.rs"),
1971            vec!["live_index".to_string(), "src".to_string()]
1972        );
1973    }
1974
1975    #[test]
1976    fn test_update_file_inserts_and_updates_timestamp() {
1977        let mut index = make_empty_live_index();
1978        let before = SystemTime::now();
1979        let file = make_indexed_file_for_mutation("src/new.rs");
1980        index.update_file("src/new.rs".to_string(), file);
1981        let after = SystemTime::now();
1982
1983        assert!(
1984            index.get_file("src/new.rs").is_some(),
1985            "file should be inserted"
1986        );
1987        assert_eq!(
1988            index.files_by_basename.get("new.rs"),
1989            Some(&vec!["src/new.rs".to_string()])
1990        );
1991        assert_eq!(
1992            index.files_by_dir_component.get("src"),
1993            Some(&vec!["src/new.rs".to_string()])
1994        );
1995        let ts = index.loaded_at_system;
1996        assert!(ts >= before, "loaded_at_system should be >= before update");
1997        assert!(ts <= after, "loaded_at_system should be <= after update");
1998    }
1999
2000    #[test]
2001    fn test_update_file_replaces_existing() {
2002        let mut index = make_empty_live_index();
2003        let file1 = IndexedFile {
2004            relative_path: "src/foo.rs".to_string(),
2005            language: LanguageId::Rust,
2006            classification: crate::domain::FileClassification::for_code_path("src/foo.rs"),
2007            content: b"fn old() {}".to_vec(),
2008            symbols: vec![],
2009            parse_status: ParseStatus::Parsed,
2010            parse_diagnostic: None,
2011            byte_len: 11,
2012            content_hash: "old_hash".to_string(),
2013            references: vec![],
2014            alias_map: std::collections::HashMap::new(),
2015            mtime_secs: 0,
2016        };
2017        index.update_file("src/foo.rs".to_string(), file1);
2018
2019        let file2 = IndexedFile {
2020            relative_path: "src/foo.rs".to_string(),
2021            language: LanguageId::Rust,
2022            classification: crate::domain::FileClassification::for_code_path("src/foo.rs"),
2023            content: b"fn new() {}".to_vec(),
2024            symbols: vec![dummy_symbol()],
2025            parse_status: ParseStatus::Parsed,
2026            parse_diagnostic: None,
2027            byte_len: 11,
2028            content_hash: "new_hash".to_string(),
2029            references: vec![],
2030            alias_map: std::collections::HashMap::new(),
2031            mtime_secs: 0,
2032        };
2033        index.update_file("src/foo.rs".to_string(), file2);
2034
2035        let retrieved = index.get_file("src/foo.rs").unwrap();
2036        assert_eq!(
2037            retrieved.content_hash, "new_hash",
2038            "should have replaced the file"
2039        );
2040        assert_eq!(index.file_count(), 1, "should still have exactly 1 file");
2041        assert_eq!(
2042            index.files_by_basename.get("foo.rs"),
2043            Some(&vec!["src/foo.rs".to_string()])
2044        );
2045        assert_eq!(
2046            index.files_by_dir_component.get("src"),
2047            Some(&vec!["src/foo.rs".to_string()])
2048        );
2049    }
2050
2051    #[test]
2052    fn test_add_file_inserts_new() {
2053        let mut index = make_empty_live_index();
2054        assert_eq!(index.file_count(), 0);
2055
2056        let file = make_indexed_file_for_mutation("src/new.rs");
2057        index.add_file("src/new.rs".to_string(), file);
2058
2059        assert_eq!(
2060            index.file_count(),
2061            1,
2062            "file count should increase by 1 after add_file"
2063        );
2064        assert!(index.get_file("src/new.rs").is_some());
2065    }
2066
2067    #[test]
2068    fn test_remove_file_removes_existing() {
2069        let mut index = make_empty_live_index();
2070        let file = make_indexed_file_for_mutation("src/to_delete.rs");
2071        index.update_file("src/to_delete.rs".to_string(), file);
2072        assert_eq!(index.file_count(), 1);
2073
2074        index.remove_file("src/to_delete.rs");
2075        assert!(
2076            index.get_file("src/to_delete.rs").is_none(),
2077            "file should be removed"
2078        );
2079        assert_eq!(index.file_count(), 0);
2080        assert!(!index.files_by_basename.contains_key("to_delete.rs"));
2081        assert!(!index.files_by_dir_component.contains_key("src"));
2082    }
2083
2084    #[test]
2085    fn test_remove_file_nonexistent_is_noop() {
2086        let mut index = make_empty_live_index();
2087        // Set a known timestamp
2088        let known_ts = index.loaded_at_system;
2089        // Small sleep to ensure any timestamp update would be different
2090        std::thread::sleep(Duration::from_millis(5));
2091
2092        index.remove_file("nonexistent.rs");
2093
2094        assert_eq!(
2095            index.loaded_at_system, known_ts,
2096            "loaded_at_system must NOT change when removing non-existent file"
2097        );
2098    }
2099
2100    #[test]
2101    fn test_file_count_after_mutations() {
2102        let mut index = make_empty_live_index();
2103        assert_eq!(index.file_count(), 0);
2104
2105        index.add_file("a.rs".to_string(), make_indexed_file_for_mutation("a.rs"));
2106        assert_eq!(index.file_count(), 1);
2107
2108        index.add_file("b.rs".to_string(), make_indexed_file_for_mutation("b.rs"));
2109        assert_eq!(index.file_count(), 2);
2110
2111        index.update_file("a.rs".to_string(), make_indexed_file_for_mutation("a.rs"));
2112        assert_eq!(index.file_count(), 2, "update does not add a new entry");
2113
2114        index.remove_file("a.rs");
2115        assert_eq!(index.file_count(), 1);
2116
2117        index.remove_file("nonexistent.rs");
2118        assert_eq!(
2119            index.file_count(),
2120            1,
2121            "removing nonexistent does not change count"
2122        );
2123    }
2124
2125    // --- Cross-reference fields and reverse index ---
2126
2127    fn make_ref(name: &str, kind: ReferenceKind, line: u32) -> ReferenceRecord {
2128        ReferenceRecord {
2129            name: name.to_string(),
2130            qualified_name: None,
2131            kind,
2132            byte_range: (0, 1),
2133            line_range: (line, line),
2134            enclosing_symbol_index: None,
2135        }
2136    }
2137
2138    fn make_indexed_file_with_refs(path: &str, refs: Vec<ReferenceRecord>) -> IndexedFile {
2139        IndexedFile {
2140            relative_path: path.to_string(),
2141            language: LanguageId::Rust,
2142            classification: crate::domain::FileClassification::for_code_path(path),
2143            content: b"fn test() {}".to_vec(),
2144            symbols: vec![],
2145            parse_status: ParseStatus::Parsed,
2146            parse_diagnostic: None,
2147            byte_len: 12,
2148            content_hash: "abc".to_string(),
2149            references: refs,
2150            alias_map: std::collections::HashMap::new(),
2151            mtime_secs: 0,
2152        }
2153    }
2154
2155    #[test]
2156    fn test_indexed_file_from_parse_result_transfers_refs_and_alias_map() {
2157        use std::collections::HashMap;
2158        let mut alias_map = HashMap::new();
2159        alias_map.insert("Map".to_string(), "HashMap".to_string());
2160        let refs = vec![make_ref("foo", ReferenceKind::Call, 1)];
2161
2162        let result = FileProcessingResult {
2163            relative_path: "test.rs".to_string(),
2164            language: LanguageId::Rust,
2165            classification: crate::domain::FileClassification::for_code_path("test.rs"),
2166            outcome: FileOutcome::Processed,
2167            parse_diagnostic: None,
2168            symbols: vec![],
2169            byte_len: 0,
2170            content_hash: "abc".to_string(),
2171            references: refs.clone(),
2172            alias_map: alias_map.clone(),
2173        };
2174
2175        let indexed = IndexedFile::from_parse_result(result, vec![]);
2176        assert_eq!(indexed.references.len(), 1);
2177        assert_eq!(indexed.references[0].name, "foo");
2178        assert_eq!(
2179            indexed.alias_map.get("Map").map(|s| s.as_str()),
2180            Some("HashMap")
2181        );
2182    }
2183
2184    #[test]
2185    fn test_rebuild_reverse_index_builds_name_to_locations() {
2186        let mut index = make_empty_live_index();
2187
2188        let refs_a = vec![
2189            make_ref("process", ReferenceKind::Call, 5),
2190            make_ref("load", ReferenceKind::Call, 10),
2191        ];
2192        let refs_b = vec![make_ref("process", ReferenceKind::Call, 3)];
2193
2194        index.add_file(
2195            "a.rs".to_string(),
2196            make_indexed_file_with_refs("a.rs", refs_a),
2197        );
2198        index.add_file(
2199            "b.rs".to_string(),
2200            make_indexed_file_with_refs("b.rs", refs_b),
2201        );
2202
2203        // process appears in both files
2204        let locs = index
2205            .reverse_index
2206            .get("process")
2207            .expect("process should be in reverse index");
2208        assert_eq!(locs.len(), 2, "process referenced in 2 files");
2209
2210        // load appears only in a.rs
2211        let locs_load = index
2212            .reverse_index
2213            .get("load")
2214            .expect("load should be in reverse index");
2215        assert_eq!(locs_load.len(), 1);
2216        assert_eq!(locs_load[0].file_path, "a.rs");
2217        assert_eq!(locs_load[0].reference_idx, 1);
2218    }
2219
2220    #[test]
2221    fn test_rebuild_reverse_index_consistent_after_update_file() {
2222        let mut index = make_empty_live_index();
2223
2224        let refs_old = vec![make_ref("old_func", ReferenceKind::Call, 1)];
2225        index.add_file(
2226            "src.rs".to_string(),
2227            make_indexed_file_with_refs("src.rs", refs_old),
2228        );
2229        assert!(index.reverse_index.contains_key("old_func"));
2230
2231        let refs_new = vec![make_ref("new_func", ReferenceKind::Call, 1)];
2232        index.update_file(
2233            "src.rs".to_string(),
2234            make_indexed_file_with_refs("src.rs", refs_new),
2235        );
2236
2237        assert!(
2238            !index.reverse_index.contains_key("old_func"),
2239            "stale entry should be gone"
2240        );
2241        assert!(
2242            index.reverse_index.contains_key("new_func"),
2243            "new entry should be present"
2244        );
2245    }
2246
2247    #[test]
2248    fn test_rebuild_reverse_index_excludes_removed_file() {
2249        let mut index = make_empty_live_index();
2250
2251        let refs = vec![make_ref("target_fn", ReferenceKind::Call, 2)];
2252        index.add_file(
2253            "will_delete.rs".to_string(),
2254            make_indexed_file_with_refs("will_delete.rs", refs),
2255        );
2256        assert!(index.reverse_index.contains_key("target_fn"));
2257
2258        index.remove_file("will_delete.rs");
2259        assert!(
2260            !index.reverse_index.contains_key("target_fn"),
2261            "removed file's refs should be gone"
2262        );
2263    }
2264
2265    #[test]
2266    fn test_reference_location_fields() {
2267        let loc = ReferenceLocation {
2268            file_path: "src/main.rs".to_string(),
2269            reference_idx: 3,
2270        };
2271        assert_eq!(loc.file_path, "src/main.rs");
2272        assert_eq!(loc.reference_idx, 3);
2273    }
2274
2275    #[test]
2276    fn test_empty_live_index_has_empty_reverse_index() {
2277        let index = make_empty_live_index();
2278        assert!(
2279            index.reverse_index.is_empty(),
2280            "fresh index should have empty reverse index"
2281        );
2282    }
2283
2284    #[test]
2285    fn test_incremental_reverse_index_matches_full_rebuild() {
2286        let mut index = make_empty_live_index();
2287
2288        // Add two files with overlapping references
2289        let refs_a = vec![
2290            make_ref("shared_fn", ReferenceKind::Call, 1),
2291            make_ref("only_a", ReferenceKind::Call, 5),
2292        ];
2293        let refs_b = vec![
2294            make_ref("shared_fn", ReferenceKind::Call, 2),
2295            make_ref("only_b", ReferenceKind::Call, 8),
2296        ];
2297        index.add_file(
2298            "a.rs".to_string(),
2299            make_indexed_file_with_refs("a.rs", refs_a),
2300        );
2301        index.add_file(
2302            "b.rs".to_string(),
2303            make_indexed_file_with_refs("b.rs", refs_b),
2304        );
2305
2306        // Update a.rs with new references (triggers incremental update)
2307        let refs_a_new = vec![
2308            make_ref("shared_fn", ReferenceKind::Call, 1),
2309            make_ref("replaced_a", ReferenceKind::Call, 10),
2310        ];
2311        index.update_file(
2312            "a.rs".to_string(),
2313            make_indexed_file_with_refs("a.rs", refs_a_new),
2314        );
2315
2316        // Snapshot the incremental result
2317        let incremental: HashMap<String, Vec<(String, u32)>> = index
2318            .reverse_index
2319            .iter()
2320            .map(|(k, v)| {
2321                let mut locs: Vec<(String, u32)> = v
2322                    .iter()
2323                    .map(|l| (l.file_path.clone(), l.reference_idx))
2324                    .collect();
2325                locs.sort();
2326                (k.clone(), locs)
2327            })
2328            .collect();
2329
2330        // Now do a full rebuild and compare
2331        index.rebuild_reverse_index();
2332        let full_rebuild: HashMap<String, Vec<(String, u32)>> = index
2333            .reverse_index
2334            .iter()
2335            .map(|(k, v)| {
2336                let mut locs: Vec<(String, u32)> = v
2337                    .iter()
2338                    .map(|l| (l.file_path.clone(), l.reference_idx))
2339                    .collect();
2340                locs.sort();
2341                (k.clone(), locs)
2342            })
2343            .collect();
2344
2345        assert_eq!(
2346            incremental, full_rebuild,
2347            "incremental update should produce same result as full rebuild"
2348        );
2349
2350        // Verify specific expectations
2351        assert!(
2352            !index.reverse_index.contains_key("only_a"),
2353            "only_a should be gone after update"
2354        );
2355        assert!(
2356            index.reverse_index.contains_key("replaced_a"),
2357            "replaced_a should be present"
2358        );
2359        assert!(
2360            index.reverse_index.contains_key("only_b"),
2361            "only_b should still be present from b.rs"
2362        );
2363        let shared = index.reverse_index.get("shared_fn").unwrap();
2364        assert_eq!(shared.len(), 2, "shared_fn still referenced in both files");
2365    }
2366
2367    #[test]
2368    fn test_incremental_reverse_index_remove() {
2369        let mut index = make_empty_live_index();
2370
2371        let refs_a = vec![
2372            make_ref("common", ReferenceKind::Call, 1),
2373            make_ref("unique_a", ReferenceKind::Call, 3),
2374        ];
2375        let refs_b = vec![
2376            make_ref("common", ReferenceKind::Call, 2),
2377            make_ref("unique_b", ReferenceKind::Call, 4),
2378        ];
2379        index.add_file(
2380            "a.rs".to_string(),
2381            make_indexed_file_with_refs("a.rs", refs_a),
2382        );
2383        index.add_file(
2384            "b.rs".to_string(),
2385            make_indexed_file_with_refs("b.rs", refs_b),
2386        );
2387
2388        // Remove a.rs
2389        index.remove_file("a.rs");
2390
2391        // unique_a should be gone entirely
2392        assert!(
2393            !index.reverse_index.contains_key("unique_a"),
2394            "unique_a should be removed with a.rs"
2395        );
2396        // unique_b should remain
2397        assert!(
2398            index.reverse_index.contains_key("unique_b"),
2399            "unique_b should survive"
2400        );
2401        // common should only have b.rs
2402        let common_locs = index
2403            .reverse_index
2404            .get("common")
2405            .expect("common should still exist from b.rs");
2406        assert_eq!(common_locs.len(), 1);
2407        assert_eq!(common_locs[0].file_path, "b.rs");
2408
2409        // Verify incremental matches full rebuild
2410        let incremental: HashMap<String, Vec<(String, u32)>> = index
2411            .reverse_index
2412            .iter()
2413            .map(|(k, v)| {
2414                let mut locs: Vec<(String, u32)> = v
2415                    .iter()
2416                    .map(|l| (l.file_path.clone(), l.reference_idx))
2417                    .collect();
2418                locs.sort();
2419                (k.clone(), locs)
2420            })
2421            .collect();
2422
2423        index.rebuild_reverse_index();
2424        let full_rebuild: HashMap<String, Vec<(String, u32)>> = index
2425            .reverse_index
2426            .iter()
2427            .map(|(k, v)| {
2428                let mut locs: Vec<(String, u32)> = v
2429                    .iter()
2430                    .map(|l| (l.file_path.clone(), l.reference_idx))
2431                    .collect();
2432                locs.sort();
2433                (k.clone(), locs)
2434            })
2435            .collect();
2436
2437        assert_eq!(
2438            incremental, full_rebuild,
2439            "incremental remove should match full rebuild"
2440        );
2441    }
2442
2443    // --- CR2: circuit-breaker determinism test ---
2444
2445    #[test]
2446    fn test_circuit_breaker_deterministic_after_sort() {
2447        // Simulate what the store does: collect parse results from par_iter (nondeterministic
2448        // order), sort by path, then walk sequentially recording success/failure.
2449        // We verify that two different orderings of the same results, after sorting,
2450        // produce the same trip point.
2451
2452        // 10 entries: "a/f00.rs"–"a/f04.rs" succeed, "a/f05.rs"–"a/f09.rs" fail (50% failure).
2453        // After sorting alphabetically the failures are always in positions 5-9.
2454        // The circuit breaker threshold is 20%, min-file guard is 5.
2455        // After processing f05 (6 total, 1 fail so far) rate=16% → no trip.
2456        // After processing f06 (7 total, 2 fail) rate=28% → trips.
2457
2458        let mut results: Vec<(String, bool)> = vec![
2459            ("a/f00.rs".to_string(), true),
2460            ("a/f01.rs".to_string(), true),
2461            ("a/f02.rs".to_string(), true),
2462            ("a/f03.rs".to_string(), true),
2463            ("a/f04.rs".to_string(), true),
2464            ("a/f05.rs".to_string(), false),
2465            ("a/f06.rs".to_string(), false),
2466            ("a/f07.rs".to_string(), false),
2467            ("a/f08.rs".to_string(), false),
2468            ("a/f09.rs".to_string(), false),
2469        ];
2470
2471        // Helper: run CB logic over a slice and return the path where it tripped.
2472        let run_cb = |items: &[(String, bool)]| -> Option<String> {
2473            let cb = CircuitBreakerState::new(0.20);
2474            for (path, ok) in items {
2475                if *ok {
2476                    cb.record_success();
2477                } else {
2478                    cb.record_failure(path, "parse error");
2479                }
2480                if cb.should_abort() {
2481                    return Some(path.clone());
2482                }
2483            }
2484            None
2485        };
2486
2487        // Sorted order → deterministic trip point.
2488        results.sort_by(|a, b| a.0.cmp(&b.0));
2489        let trip_sorted = run_cb(&results);
2490
2491        // Reversed order (simulates a different par_iter ordering).
2492        results.reverse();
2493        results.sort_by(|a, b| a.0.cmp(&b.0)); // sort again — same as before
2494        let trip_sorted2 = run_cb(&results);
2495
2496        // Both sorted runs must trip at the same file.
2497        assert_eq!(
2498            trip_sorted, trip_sorted2,
2499            "sorted runs must trip at the same path"
2500        );
2501        assert!(trip_sorted.is_some(), "circuit breaker should have tripped");
2502
2503        // Without sorting (reverse order): failures come first, CB trips earlier.
2504        let mut reversed: Vec<(String, bool)> = results.clone();
2505        reversed.reverse(); // failures first
2506        let trip_unsorted = run_cb(&reversed);
2507
2508        // The unsorted trip path differs from the sorted one, proving sort matters.
2509        // (Both will trip, but at different paths.)
2510        assert_ne!(
2511            trip_sorted, trip_unsorted,
2512            "unsorted order should trip at a different (earlier) path, proving sort is needed"
2513        );
2514    }
2515
2516    #[test]
2517    fn test_tier_counts() {
2518        use crate::domain::index::{AdmissionDecision, AdmissionTier, SkipReason, SkippedFile};
2519
2520        let mut index = make_empty_live_index();
2521        assert_eq!(index.tier_counts(), (0, 0, 0));
2522
2523        index.add_skipped_file(SkippedFile {
2524            path: "model.bin".into(),
2525            size: 1000,
2526            extension: Some("bin".into()),
2527            decision: AdmissionDecision::skip(
2528                AdmissionTier::MetadataOnly,
2529                SkipReason::DenylistedExtension,
2530            ),
2531        });
2532        index.add_skipped_file(SkippedFile {
2533            path: "huge.dat".into(),
2534            size: 200_000_000,
2535            extension: Some("dat".into()),
2536            decision: AdmissionDecision::skip(AdmissionTier::HardSkip, SkipReason::SizeCeiling),
2537        });
2538
2539        assert_eq!(index.tier_counts(), (0, 1, 1));
2540    }
2541}