1use std::collections::{HashMap, HashSet};
2use std::ops::{Deref, DerefMut};
3use std::path::Path;
4use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering};
5use std::sync::{Arc, Mutex};
6
7use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard};
8use std::time::{Duration, Instant, SystemTime};
9
10use rayon::prelude::*;
11use tracing::{error, info, warn};
12
13use super::query::RepoOutlineView;
14use crate::domain::ParseDiagnostic;
15use crate::domain::index::{AdmissionTier, SkippedFile};
16use crate::domain::{
17 FileClassification, FileOutcome, FileProcessingResult, LanguageId, ReferenceRecord,
18 SymbolRecord, find_enclosing_symbol,
19};
20use crate::{discovery, parsing};
21
22#[derive(Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
24pub enum ParseStatus {
25 Parsed,
27 PartialParse { warning: String },
29 Failed { error: String },
31}
32
33#[derive(Clone, Debug)]
35pub struct IndexedFile {
36 pub relative_path: String,
37 pub language: LanguageId,
38 pub classification: FileClassification,
39 pub content: Vec<u8>,
41 pub symbols: Vec<SymbolRecord>,
43 pub parse_status: ParseStatus,
44 pub parse_diagnostic: Option<ParseDiagnostic>,
45 pub byte_len: u64,
46 pub content_hash: String,
47 pub references: Vec<ReferenceRecord>,
49 pub alias_map: HashMap<String, String>,
51 pub mtime_secs: u64,
55}
56
57#[derive(Clone, Debug)]
60pub struct ReferenceLocation {
61 pub file_path: String,
63 pub reference_idx: u32,
65}
66
67impl IndexedFile {
68 pub fn from_parse_result(result: FileProcessingResult, content: Vec<u8>) -> Self {
69 let parse_status = match &result.outcome {
70 FileOutcome::Processed => ParseStatus::Parsed,
71 FileOutcome::PartialParse { warning } => ParseStatus::PartialParse {
72 warning: warning.clone(),
73 },
74 FileOutcome::Failed { error } => ParseStatus::Failed {
75 error: error.clone(),
76 },
77 };
78
79 let FileProcessingResult {
81 relative_path,
82 language,
83 classification,
84 outcome: _,
85 parse_diagnostic,
86 symbols,
87 byte_len,
88 content_hash,
89 references: raw_references,
90 alias_map,
91 } = result;
92
93 let symbol_byte_ranges: std::collections::HashSet<(u32, u32)> =
97 symbols.iter().map(|s| s.byte_range).collect();
98
99 let references: Vec<ReferenceRecord> = raw_references
101 .into_iter()
102 .filter(|r| !symbol_byte_ranges.contains(&r.byte_range))
103 .map(|mut r| {
104 if r.enclosing_symbol_index.is_none() {
105 r.enclosing_symbol_index = find_enclosing_symbol(&symbols, r.line_range.0);
106 }
107 r
108 })
109 .collect();
110
111 IndexedFile {
112 relative_path,
113 language,
114 classification,
115 content,
116 symbols,
117 parse_status,
118 parse_diagnostic,
119 byte_len,
120 content_hash,
121 references,
122 alias_map,
123 mtime_secs: 0,
124 }
125 }
126
127 pub fn with_mtime(mut self, mtime_secs: u64) -> Self {
130 self.mtime_secs = mtime_secs;
131 self
132 }
133}
134
135impl AsRef<IndexedFile> for IndexedFile {
136 fn as_ref(&self) -> &IndexedFile {
137 self
138 }
139}
140
141pub struct CircuitBreakerState {
143 total: AtomicUsize,
144 failed: AtomicUsize,
145 tripped: AtomicBool,
146 threshold: f64,
148 failure_details: Mutex<Vec<(String, String)>>,
150}
151
152impl CircuitBreakerState {
153 pub fn new(threshold: f64) -> Self {
155 Self {
156 total: AtomicUsize::new(0),
157 failed: AtomicUsize::new(0),
158 tripped: AtomicBool::new(false),
159 threshold,
160 failure_details: Mutex::new(Vec::new()),
161 }
162 }
163
164 pub fn from_env() -> Self {
166 let threshold = std::env::var("SYMFORGE_CB_THRESHOLD")
167 .ok()
168 .and_then(|v| v.parse::<f64>().ok())
169 .unwrap_or(0.20);
170 Self::new(threshold)
171 }
172
173 pub fn record_success(&self) {
174 self.total.fetch_add(1, Ordering::Relaxed);
175 }
176
177 pub fn record_failure(&self, path: &str, reason: &str) {
178 self.total.fetch_add(1, Ordering::Relaxed);
179 self.failed.fetch_add(1, Ordering::Relaxed);
180
181 let mut details = self.failure_details.lock().unwrap();
182 if details.len() < 5 {
183 details.push((path.to_string(), reason.to_string()));
184 }
185 }
186
187 pub fn should_abort(&self) -> bool {
192 let total = self.total.load(Ordering::Relaxed);
193 if total < 5 {
194 return false;
195 }
196 let failed = self.failed.load(Ordering::Relaxed);
197 let rate = failed as f64 / total as f64;
198 if rate > self.threshold {
199 self.tripped.store(true, Ordering::Relaxed);
200 true
201 } else {
202 false
203 }
204 }
205
206 pub fn is_tripped(&self) -> bool {
207 self.tripped.load(Ordering::Relaxed)
208 }
209
210 pub fn summary(&self) -> String {
212 let total = self.total.load(Ordering::Relaxed);
213 let failed = self.failed.load(Ordering::Relaxed);
214 let rate = if total > 0 {
215 (failed as f64 / total as f64 * 100.0) as u32
216 } else {
217 0
218 };
219
220 let details = self.failure_details.lock().unwrap();
221 let top_failures: Vec<String> = details
222 .iter()
223 .take(3)
224 .map(|(p, r)| format!(" - {p}: {r}"))
225 .collect();
226
227 let mut msg = format!(
228 "circuit breaker tripped: {failed}/{total} files failed ({rate}% > {}%)",
229 (self.threshold * 100.0) as u32
230 );
231 if !top_failures.is_empty() {
232 msg.push_str("\nTop failures:\n");
233 msg.push_str(&top_failures.join("\n"));
234 }
235 msg
236 }
237}
238
239#[derive(Clone, Debug, PartialEq, Eq)]
241pub enum IndexState {
242 Empty,
244 Loading,
245 Ready,
246 CircuitBreakerTripped {
247 summary: String,
248 },
249}
250
251#[derive(Copy, Clone, Debug, PartialEq, Eq)]
253pub enum IndexLoadSource {
254 EmptyBootstrap,
255 FreshLoad,
256 SnapshotRestore,
257}
258
259#[derive(Copy, Clone, Debug, PartialEq, Eq)]
261pub enum SnapshotVerifyState {
262 NotNeeded,
263 Pending,
264 Running,
265 Completed,
266}
267
268#[derive(Copy, Clone, Debug, PartialEq, Eq)]
270pub enum PublishedIndexStatus {
271 Empty,
272 Loading,
273 Ready,
274 Degraded,
275}
276
277#[derive(Clone, Debug, PartialEq, Eq)]
279pub struct PublishedIndexState {
280 pub generation: u64,
281 pub status: PublishedIndexStatus,
282 pub degraded_summary: Option<String>,
283 pub file_count: usize,
284 pub parsed_count: usize,
285 pub partial_parse_count: usize,
286 pub failed_count: usize,
287 pub symbol_count: usize,
288 pub loaded_at_system: SystemTime,
289 pub load_duration: Duration,
290 pub load_source: IndexLoadSource,
291 pub snapshot_verify_state: SnapshotVerifyState,
292 pub is_empty: bool,
293 pub tier_counts: (usize, usize, usize),
295}
296
297pub struct LiveIndex {
299 pub(crate) files: HashMap<String, Arc<IndexedFile>>,
301 pub(crate) loaded_at: Instant,
302 pub(crate) loaded_at_system: SystemTime,
304 pub(crate) load_duration: Duration,
305 pub(crate) cb_state: CircuitBreakerState,
306 pub(crate) is_empty: bool,
308 pub(crate) load_source: IndexLoadSource,
310 pub(crate) snapshot_verify_state: SnapshotVerifyState,
312 pub(crate) reverse_index: HashMap<String, Vec<ReferenceLocation>>,
316 pub(crate) files_by_basename: HashMap<String, Vec<String>>,
318 pub(crate) files_by_dir_component: HashMap<String, Vec<String>>,
320 pub(crate) trigram_index: super::trigram::TrigramIndex,
322 pub(crate) gitignore: Option<ignore::gitignore::Gitignore>,
325 pub(crate) skipped_files: Vec<SkippedFile>,
327}
328
329#[derive(Clone, Debug)]
334pub struct PreUpdateSymbol {
335 pub name: String,
336 pub kind: String,
337 pub line_range: (u32, u32),
338 pub byte_range: (u32, u32),
339}
340
341pub struct SharedIndexHandle {
360 live: RwLock<LiveIndex>,
361 published_state: RwLock<Arc<PublishedIndexState>>,
362 published_repo_outline: RwLock<Arc<RepoOutlineView>>,
363 next_generation: AtomicU64,
364 git_temporal: RwLock<Arc<super::git_temporal::GitTemporalIndex>>,
368 pre_update_symbols: RwLock<HashMap<String, Vec<PreUpdateSymbol>>>,
372}
373
374pub struct SharedIndexWriteGuard<'a> {
376 handle: &'a SharedIndexHandle,
377 guard: RwLockWriteGuard<'a, LiveIndex>,
378 dirty: bool,
379}
380
381impl SharedIndexHandle {
382 pub fn new(index: LiveIndex) -> Self {
383 let published_state = Arc::new(PublishedIndexState::capture(0, &index));
384 let published_repo_outline = Arc::new(index.capture_repo_outline_view());
385 Self {
386 live: RwLock::new(index),
387 published_state: RwLock::new(published_state),
388 published_repo_outline: RwLock::new(published_repo_outline),
389 next_generation: AtomicU64::new(1),
390 git_temporal: RwLock::new(Arc::new(super::git_temporal::GitTemporalIndex::pending())),
391 pre_update_symbols: RwLock::new(HashMap::new()),
392 }
393 }
394
395 pub fn shared(index: LiveIndex) -> Arc<Self> {
396 Arc::new(Self::new(index))
397 }
398
399 pub fn read(&self) -> RwLockReadGuard<'_, LiveIndex> {
400 self.live.read()
401 }
402
403 pub fn write(&self) -> SharedIndexWriteGuard<'_> {
404 SharedIndexWriteGuard {
405 handle: self,
406 guard: self.live.write(),
407 dirty: false,
408 }
409 }
410
411 pub fn published_state(&self) -> Arc<PublishedIndexState> {
412 self.published_state.read().clone()
413 }
414
415 pub fn published_repo_outline(&self) -> Arc<RepoOutlineView> {
416 self.published_repo_outline.read().clone()
417 }
418
419 pub fn reload(&self, root: &Path) -> anyhow::Result<()> {
420 let data = LiveIndex::build_reload_data(root)?;
424 let mut live = self.live.write();
425 live.apply_reload_data(data);
426 self.publish_locked(&live);
427 Ok(())
428 }
429
430 pub fn update_file(&self, path: String, file: IndexedFile) {
431 let mut live = self.live.write();
432 if let Some(existing) = live.get_file(&path) {
435 let snapshot: Vec<PreUpdateSymbol> = existing
436 .symbols
437 .iter()
438 .map(|s| PreUpdateSymbol {
439 name: s.name.clone(),
440 kind: s.kind.to_string(),
441 line_range: s.line_range,
442 byte_range: s.byte_range,
443 })
444 .collect();
445 self.pre_update_symbols
446 .write()
447 .insert(path.clone(), snapshot);
448 }
449 let path_clone = path.clone();
450 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
451 live.update_file(path, file);
452 }));
453 if let Err(panic_info) = result {
454 let msg = panic_info
455 .downcast_ref::<String>()
456 .map(|s| s.as_str())
457 .or_else(|| panic_info.downcast_ref::<&str>().copied())
458 .unwrap_or("unknown");
459 tracing::error!(
460 "index mutation panicked for '{}': {} — repairing",
461 path_clone,
462 msg
463 );
464 live.repair_file_indices(&path_clone);
465 }
466 self.publish_locked(&live);
467 }
468
469 pub fn add_file(&self, path: String, file: IndexedFile) {
470 let mut live = self.live.write();
471 let path_clone = path.clone();
472 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
473 live.add_file(path, file);
474 }));
475 if let Err(panic_info) = result {
476 let msg = panic_info
477 .downcast_ref::<String>()
478 .map(|s| s.as_str())
479 .or_else(|| panic_info.downcast_ref::<&str>().copied())
480 .unwrap_or("unknown");
481 tracing::error!(
482 "index add panicked for '{}': {} — repairing",
483 path_clone,
484 msg
485 );
486 live.repair_file_indices(&path_clone);
487 }
488 self.publish_locked(&live);
489 }
490
491 pub fn remove_file(&self, path: &str) {
492 let mut live = self.live.write();
493 let path_owned = path.to_string();
494 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
495 live.remove_file(path);
496 }));
497 if let Err(panic_info) = result {
498 let msg = panic_info
499 .downcast_ref::<String>()
500 .map(|s| s.as_str())
501 .or_else(|| panic_info.downcast_ref::<&str>().copied())
502 .unwrap_or("unknown");
503 tracing::error!(
504 "index remove panicked for '{}': {} — repairing",
505 path_owned,
506 msg
507 );
508 live.repair_file_indices(&path_owned);
509 }
510 self.publish_locked(&live);
511 }
512
513 pub fn mark_snapshot_verify_running(&self) {
514 let mut live = self.live.write();
515 live.mark_snapshot_verify_running();
516 self.publish_locked(&live);
517 }
518
519 pub fn mark_snapshot_verify_completed(&self) {
520 let mut live = self.live.write();
521 live.mark_snapshot_verify_completed();
522 self.publish_locked(&live);
523 }
524
525 fn publish_locked(&self, live: &LiveIndex) {
526 let generation = self.next_generation.fetch_add(1, Ordering::Relaxed);
527 let published_state = Arc::new(PublishedIndexState::capture(generation, live));
528 let published_repo_outline = Arc::new(live.capture_repo_outline_view());
529 *self.published_state.write() = published_state;
530 *self.published_repo_outline.write() = published_repo_outline;
531 }
532
533 pub fn git_temporal(&self) -> Arc<super::git_temporal::GitTemporalIndex> {
535 self.git_temporal.read().clone()
536 }
537
538 pub fn take_pre_update_symbols(&self, path: &str) -> Option<Vec<PreUpdateSymbol>> {
544 self.pre_update_symbols.write().remove(path)
545 }
546
547 pub fn update_git_temporal(&self, index: super::git_temporal::GitTemporalIndex) {
549 *self.git_temporal.write() = Arc::new(index);
550 }
551}
552
553impl<'a> Deref for SharedIndexWriteGuard<'a> {
554 type Target = LiveIndex;
555
556 fn deref(&self) -> &Self::Target {
557 &self.guard
558 }
559}
560
561impl DerefMut for SharedIndexWriteGuard<'_> {
562 fn deref_mut(&mut self) -> &mut Self::Target {
563 self.dirty = true;
564 &mut self.guard
565 }
566}
567
568impl Drop for SharedIndexWriteGuard<'_> {
569 fn drop(&mut self) {
570 if self.dirty {
571 self.handle.publish_locked(&self.guard);
572 }
573 }
574}
575
576pub type SharedIndex = Arc<SharedIndexHandle>;
578
579impl PublishedIndexState {
580 fn capture(generation: u64, index: &LiveIndex) -> Self {
581 let (status, degraded_summary) = match index.index_state() {
582 IndexState::Empty => (PublishedIndexStatus::Empty, None),
583 IndexState::Loading => (PublishedIndexStatus::Loading, None),
584 IndexState::Ready => (PublishedIndexStatus::Ready, None),
585 IndexState::CircuitBreakerTripped { summary } => {
586 (PublishedIndexStatus::Degraded, Some(summary))
587 }
588 };
589 let stats = index.health_stats();
590 Self {
591 generation,
592 status,
593 degraded_summary,
594 file_count: stats.file_count,
595 parsed_count: stats.parsed_count,
596 partial_parse_count: stats.partial_parse_count,
597 failed_count: stats.failed_count,
598 symbol_count: stats.symbol_count,
599 loaded_at_system: index.loaded_at_system,
600 load_duration: stats.load_duration,
601 load_source: index.load_source,
602 snapshot_verify_state: index.snapshot_verify_state,
603 is_empty: index.is_empty,
604 tier_counts: stats.tier_counts,
605 }
606 }
607
608 pub fn status_label(&self) -> &'static str {
609 match self.status {
610 PublishedIndexStatus::Empty => "Empty",
611 PublishedIndexStatus::Loading => "Loading",
612 PublishedIndexStatus::Ready => "Ready",
613 PublishedIndexStatus::Degraded => "Degraded",
614 }
615 }
616}
617
618pub(crate) struct DerivedIndices {
622 pub trigram_index: super::trigram::TrigramIndex,
623 pub reverse_index: HashMap<String, Vec<ReferenceLocation>>,
624 pub files_by_basename: HashMap<String, Vec<String>>,
625 pub files_by_dir_component: HashMap<String, Vec<String>>,
626}
627
628impl DerivedIndices {
629 pub(crate) fn build_from_files(files: &HashMap<String, Arc<IndexedFile>>) -> Self {
632 let (files_by_basename, files_by_dir_component) = build_path_indices_from_files(files);
633 Self {
634 trigram_index: super::trigram::TrigramIndex::build_from_files(files),
635 reverse_index: build_reverse_index_from_files(files),
636 files_by_basename,
637 files_by_dir_component,
638 }
639 }
640}
641
642pub(crate) struct ReloadData {
654 pub files: HashMap<String, Arc<IndexedFile>>,
655 pub cb_state: CircuitBreakerState,
656 pub load_duration: Duration,
657 pub gitignore: Option<ignore::gitignore::Gitignore>,
658 pub derived: DerivedIndices,
659 pub skipped_files: Vec<SkippedFile>,
660}
661
662pub(crate) fn build_reverse_index_from_files(
664 files: &HashMap<String, Arc<IndexedFile>>,
665) -> HashMap<String, Vec<ReferenceLocation>> {
666 let mut idx: HashMap<String, Vec<ReferenceLocation>> = HashMap::new();
667 for (file_path, indexed_file) in files {
668 for (reference_idx, reference) in indexed_file.references.iter().enumerate() {
669 idx.entry(reference.name.clone())
670 .or_default()
671 .push(ReferenceLocation {
672 file_path: file_path.clone(),
673 reference_idx: reference_idx as u32,
674 });
675 }
676 }
677 idx
678}
679
680pub(crate) fn build_path_indices_from_files(
682 files: &HashMap<String, Arc<IndexedFile>>,
683) -> (HashMap<String, Vec<String>>, HashMap<String, Vec<String>>) {
684 let mut by_basename: HashMap<String, Vec<String>> = HashMap::new();
685 let mut by_dir_component: HashMap<String, Vec<String>> = HashMap::new();
686 for path in files.keys() {
687 if let Some(basename) = basename_key(path) {
688 insert_sorted_unique(by_basename.entry(basename).or_default(), path);
689 }
690 for component in dir_component_keys(path) {
691 insert_sorted_unique(by_dir_component.entry(component).or_default(), path);
692 }
693 }
694 (by_basename, by_dir_component)
695}
696
697impl LiveIndex {
698 pub fn load(root: &Path) -> anyhow::Result<SharedIndex> {
704 let start = Instant::now();
705
706 info!("LiveIndex::load starting at {:?}", root);
707
708 let all_entries = discovery::discover_all_files(root)?;
711 info!(
712 "discovered {} total files (pre-admission)",
713 all_entries.len()
714 );
715
716 use crate::discovery::classify_admission;
721 use crate::domain::index::{AdmissionTier, SkippedFile};
722
723 enum AdmissionOutcome {
724 Parse {
725 relative_path: String,
726 language: crate::domain::LanguageId,
727 classification: crate::domain::FileClassification,
728 bytes: Vec<u8>,
729 mtime_secs: u64,
730 },
731 Skip(SkippedFile),
732 }
733
734 let outcomes: Vec<AdmissionOutcome> = all_entries
735 .par_iter()
736 .filter_map(|entry| {
737 let decision_pre = classify_admission(
739 &entry.absolute_path,
740 entry.file_size,
741 None, );
743
744 match decision_pre.tier {
745 AdmissionTier::HardSkip | AdmissionTier::MetadataOnly => {
746 let sf = SkippedFile {
748 path: entry.relative_path.clone(),
749 size: entry.file_size,
750 extension: entry
751 .absolute_path
752 .extension()
753 .and_then(|e| e.to_str())
754 .map(|s| s.to_string()),
755 decision: decision_pre,
756 };
757 return Some(AdmissionOutcome::Skip(sf));
758 }
759 AdmissionTier::Normal => {}
760 }
761
762 let language = match &entry.language {
765 Some(lang) => lang.clone(),
766 None => {
767 let bytes = match std::fs::read(&entry.absolute_path) {
770 Ok(b) => b,
771 Err(e) => {
772 warn!("failed to read {:?}: {}", entry.absolute_path, e);
773 return None;
774 }
775 };
776 let decision_post =
777 classify_admission(&entry.absolute_path, entry.file_size, Some(&bytes));
778 let sf = SkippedFile {
779 path: entry.relative_path.clone(),
780 size: entry.file_size,
781 extension: entry
782 .absolute_path
783 .extension()
784 .and_then(|e| e.to_str())
785 .map(|s| s.to_string()),
786 decision: decision_post,
787 };
788 return Some(AdmissionOutcome::Skip(sf));
789 }
790 };
791
792 let bytes = match std::fs::read(&entry.absolute_path) {
794 Ok(b) => b,
795 Err(e) => {
796 warn!("failed to read {:?}: {}", entry.absolute_path, e);
797 return None;
798 }
799 };
800 let mtime_secs = std::fs::metadata(&entry.absolute_path)
801 .and_then(|m| m.modified())
802 .ok()
803 .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
804 .map(|d| d.as_secs())
805 .unwrap_or(0);
806
807 let decision_post =
808 classify_admission(&entry.absolute_path, entry.file_size, Some(&bytes));
809
810 match decision_post.tier {
811 AdmissionTier::HardSkip | AdmissionTier::MetadataOnly => {
812 let sf = SkippedFile {
814 path: entry.relative_path.clone(),
815 size: entry.file_size,
816 extension: entry
817 .absolute_path
818 .extension()
819 .and_then(|e| e.to_str())
820 .map(|s| s.to_string()),
821 decision: decision_post,
822 };
823 Some(AdmissionOutcome::Skip(sf))
824 }
825 AdmissionTier::Normal => Some(AdmissionOutcome::Parse {
826 relative_path: entry.relative_path.clone(),
827 language,
828 classification: entry.classification,
829 bytes,
830 mtime_secs,
831 }),
832 }
833 })
834 .collect();
835
836 let mut skipped_files: Vec<SkippedFile> = Vec::new();
838 let mut to_parse: Vec<(
839 String,
840 crate::domain::LanguageId,
841 crate::domain::FileClassification,
842 Vec<u8>,
843 u64, )> = Vec::new();
845
846 for outcome in outcomes {
847 match outcome {
848 AdmissionOutcome::Skip(sf) => skipped_files.push(sf),
849 AdmissionOutcome::Parse {
850 relative_path,
851 language,
852 classification,
853 bytes,
854 mtime_secs,
855 } => {
856 to_parse.push((relative_path, language, classification, bytes, mtime_secs));
857 }
858 }
859 }
860
861 info!(
862 "admission gate: {} to parse, {} skipped",
863 to_parse.len(),
864 skipped_files.len()
865 );
866
867 let mut parse_results: Vec<(String, IndexedFile)> = to_parse
869 .par_iter()
870 .map(
871 |(relative_path, language, classification, bytes, mtime_secs)| {
872 let result = parsing::process_file_with_classification(
873 relative_path,
874 bytes,
875 language.clone(),
876 *classification,
877 );
878 let indexed = IndexedFile::from_parse_result(result, bytes.clone())
879 .with_mtime(*mtime_secs);
880 (relative_path.clone(), indexed)
881 },
882 )
883 .collect();
884
885 parse_results.sort_by(|a, b| a.0.cmp(&b.0));
887
888 let cb_state = CircuitBreakerState::from_env();
890 let mut files: HashMap<String, Arc<IndexedFile>> =
891 HashMap::with_capacity(parse_results.len());
892
893 let mut cb_tripped = false;
894 for (path, indexed_file) in parse_results {
895 match &indexed_file.parse_status {
896 ParseStatus::Failed { error } => {
897 cb_state.record_failure(&path, error);
898 }
899 _ => {
900 cb_state.record_success();
901 }
902 }
903
904 if cb_state.should_abort() {
905 let summary = cb_state.summary();
906 error!("{}", summary);
907 cb_tripped = true;
908 files.insert(path, Arc::new(indexed_file));
910 break;
911 }
912
913 files.insert(path, Arc::new(indexed_file));
914 }
915
916 if cb_tripped {
917 cb_state.tripped.store(true, Ordering::Relaxed);
918 }
919
920 let load_duration = start.elapsed();
921 info!(
922 "LiveIndex loaded: {} files, {} symbols, {} skipped, {:?}",
923 files.len(),
924 files.values().map(|f| f.symbols.len()).sum::<usize>(),
925 skipped_files.len(),
926 load_duration
927 );
928
929 let trigram_index = super::trigram::TrigramIndex::build_from_files(&files);
930 let gitignore = discovery::load_gitignore(root);
931
932 let mut index = LiveIndex {
933 files,
934 loaded_at: Instant::now(),
935 loaded_at_system: SystemTime::now(),
936 load_duration,
937 cb_state,
938 is_empty: false,
939 load_source: IndexLoadSource::FreshLoad,
940 snapshot_verify_state: SnapshotVerifyState::NotNeeded,
941 reverse_index: HashMap::new(),
942 files_by_basename: HashMap::new(),
943 files_by_dir_component: HashMap::new(),
944 trigram_index,
945 gitignore,
946 skipped_files,
947 };
948 index.rebuild_reverse_index();
949 index.rebuild_path_indices();
950
951 Ok(SharedIndexHandle::shared(index))
952 }
953
954 pub fn empty() -> SharedIndex {
959 let index = LiveIndex {
960 files: HashMap::new(),
961 loaded_at: Instant::now(),
962 loaded_at_system: SystemTime::now(),
963 load_duration: Duration::ZERO,
964 cb_state: CircuitBreakerState::new(0.20),
965 is_empty: true,
966 load_source: IndexLoadSource::EmptyBootstrap,
967 snapshot_verify_state: SnapshotVerifyState::NotNeeded,
968 reverse_index: HashMap::new(),
969 files_by_basename: HashMap::new(),
970 files_by_dir_component: HashMap::new(),
971 trigram_index: super::trigram::TrigramIndex::new(),
972 gitignore: None,
973 skipped_files: Vec::new(),
974 };
975 SharedIndexHandle::shared(index)
976 }
977
978 pub fn add_skipped_file(&mut self, sf: SkippedFile) {
979 self.skipped_files.push(sf);
980 }
981
982 pub fn skipped_files(&self) -> &[SkippedFile] {
983 &self.skipped_files
984 }
985
986 pub fn tier_counts(&self) -> (usize, usize, usize) {
990 let tier1 = self.files.len();
991 let mut tier2 = 0;
992 let mut tier3 = 0;
993 for sf in &self.skipped_files {
994 match sf.tier() {
995 AdmissionTier::MetadataOnly => tier2 += 1,
996 AdmissionTier::HardSkip => tier3 += 1,
997 AdmissionTier::Normal => {} }
999 }
1000 (tier1, tier2, tier3)
1001 }
1002
1003 pub(crate) fn build_reload_data(root: &Path) -> anyhow::Result<ReloadData> {
1008 let start = Instant::now();
1009
1010 info!("LiveIndex::build_reload_data starting at {:?}", root);
1011
1012 if !root.exists() {
1013 anyhow::bail!(
1014 "discovery error: root path does not exist: {}",
1015 root.display()
1016 );
1017 }
1018
1019 let discovered = discovery::discover_files(root)?;
1021 info!("discovered {} source files", discovered.len());
1022
1023 let parse_results: Vec<(String, IndexedFile)> = discovered
1025 .par_iter()
1026 .filter_map(|df| {
1027 let bytes = match std::fs::read(&df.absolute_path) {
1028 Ok(b) => b,
1029 Err(e) => {
1030 warn!("failed to read {:?}: {}", df.absolute_path, e);
1031 return None;
1032 }
1033 };
1034
1035 let mtime_secs = std::fs::metadata(&df.absolute_path)
1036 .and_then(|m| m.modified())
1037 .ok()
1038 .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
1039 .map(|d| d.as_secs())
1040 .unwrap_or(0);
1041
1042 let result = parsing::process_file_with_classification(
1043 &df.relative_path,
1044 &bytes,
1045 df.language.clone(),
1046 df.classification,
1047 );
1048 let indexed = IndexedFile::from_parse_result(result, bytes).with_mtime(mtime_secs);
1049 Some((df.relative_path.clone(), indexed))
1050 })
1051 .collect();
1052
1053 let new_cb = CircuitBreakerState::from_env();
1055 let mut new_files: HashMap<String, Arc<IndexedFile>> =
1056 HashMap::with_capacity(parse_results.len());
1057
1058 let mut cb_tripped = false;
1059 for (path, indexed_file) in parse_results {
1060 match &indexed_file.parse_status {
1061 ParseStatus::Failed { error } => {
1062 new_cb.record_failure(&path, error);
1063 }
1064 _ => {
1065 new_cb.record_success();
1066 }
1067 }
1068
1069 if new_cb.should_abort() {
1070 let summary = new_cb.summary();
1071 error!("{}", summary);
1072 cb_tripped = true;
1073 new_files.insert(path, Arc::new(indexed_file));
1074 break;
1075 }
1076
1077 new_files.insert(path, Arc::new(indexed_file));
1078 }
1079
1080 if cb_tripped {
1081 new_cb.tripped.store(true, Ordering::Relaxed);
1082 }
1083
1084 let load_duration = start.elapsed();
1085 info!(
1086 "LiveIndex::build_reload_data done: {} files, {} symbols, {:?}",
1087 new_files.len(),
1088 new_files.values().map(|f| f.symbols.len()).sum::<usize>(),
1089 load_duration
1090 );
1091
1092 let derived = DerivedIndices::build_from_files(&new_files);
1094
1095 Ok(ReloadData {
1096 files: new_files,
1097 cb_state: new_cb,
1098 load_duration,
1099 gitignore: discovery::load_gitignore(root),
1100 derived,
1101 skipped_files: Vec::new(),
1102 })
1103 }
1104
1105 pub(crate) fn apply_reload_data(&mut self, data: ReloadData) {
1109 self.files = data.files;
1110 self.loaded_at = Instant::now();
1111 self.loaded_at_system = SystemTime::now();
1112 self.load_duration = data.load_duration;
1113 self.cb_state = data.cb_state;
1114 self.is_empty = false;
1115 self.load_source = IndexLoadSource::FreshLoad;
1116 self.snapshot_verify_state = SnapshotVerifyState::NotNeeded;
1117 self.trigram_index = data.derived.trigram_index;
1118 self.reverse_index = data.derived.reverse_index;
1119 self.files_by_basename = data.derived.files_by_basename;
1120 self.files_by_dir_component = data.derived.files_by_dir_component;
1121 self.gitignore = data.gitignore;
1122 self.skipped_files = data.skipped_files;
1123 }
1124
1125 pub fn reload(&mut self, root: &Path) -> anyhow::Result<()> {
1133 let data = Self::build_reload_data(root)?;
1134 self.apply_reload_data(data);
1135 Ok(())
1136 }
1137
1138 pub fn update_file(&mut self, path: String, file: IndexedFile) {
1143 let old_ref_names: Vec<String> = self
1146 .files
1147 .get(&path)
1148 .map(|f| f.references.iter().map(|r| r.name.clone()).collect())
1149 .unwrap_or_default();
1150 let had_existing = !old_ref_names.is_empty() || self.files.contains_key(&path);
1151
1152 self.files.insert(path.clone(), Arc::new(file));
1158
1159 if had_existing {
1161 self.remove_path_indices_for_path(&path);
1162 }
1163 for name in &old_ref_names {
1166 if let Some(locs) = self.reverse_index.get_mut(name) {
1167 locs.retain(|loc| loc.file_path != path);
1168 if locs.is_empty() {
1169 self.reverse_index.remove(name);
1170 }
1171 }
1172 }
1173 self.trigram_index
1174 .update_file(&path, &self.files[&path].content);
1175 self.insert_reverse_index_for_path(&path);
1176 self.insert_path_indices_for_path(&path);
1177 self.is_empty = false;
1178 self.loaded_at_system = SystemTime::now();
1179 }
1180
1181 pub fn add_file(&mut self, path: String, file: IndexedFile) {
1187 self.update_file(path, file);
1188 }
1189
1190 pub fn remove_file(&mut self, path: &str) {
1195 self.remove_reverse_index_for_path(path);
1196 if self.files.remove(path).is_some() {
1197 self.trigram_index.remove_file(path);
1198 self.remove_path_indices_for_path(path);
1199 self.loaded_at_system = SystemTime::now();
1200 }
1201 }
1202
1203 fn remove_reverse_index_for_path(&mut self, path: &str) {
1206 if let Some(file) = self.files.get(path) {
1207 let names: Vec<String> = file.references.iter().map(|r| r.name.clone()).collect();
1208 for name in names {
1209 if let Some(locs) = self.reverse_index.get_mut(&name) {
1210 locs.retain(|loc| loc.file_path != path);
1211 if locs.is_empty() {
1212 self.reverse_index.remove(&name);
1213 }
1214 }
1215 }
1216 }
1217 }
1218
1219 fn insert_reverse_index_for_path(&mut self, path: &str) {
1222 if let Some(file) = self.files.get(path) {
1223 for (reference_idx, reference) in file.references.iter().enumerate() {
1224 self.reverse_index
1225 .entry(reference.name.clone())
1226 .or_default()
1227 .push(ReferenceLocation {
1228 file_path: path.to_string(),
1229 reference_idx: reference_idx as u32,
1230 });
1231 }
1232 }
1233 }
1234
1235 pub(crate) fn rebuild_reverse_index(&mut self) {
1240 self.reverse_index = build_reverse_index_from_files(&self.files);
1241 }
1242
1243 pub(crate) fn rebuild_path_indices(&mut self) {
1248 let (by_basename, by_dir_component) = build_path_indices_from_files(&self.files);
1249 self.files_by_basename = by_basename;
1250 self.files_by_dir_component = by_dir_component;
1251 }
1252
1253 pub(crate) fn repair_file_indices(&mut self, path: &str) {
1260 self.reverse_index.retain(|_name, locs| {
1263 locs.retain(|loc| loc.file_path != path);
1264 !locs.is_empty()
1265 });
1266
1267 self.remove_path_indices_for_path(path);
1269
1270 if self.files.contains_key(path) {
1272 if let Some(file) = self.files.get(path) {
1273 self.trigram_index.update_file(path, &file.content);
1274 }
1275 self.insert_reverse_index_for_path(path);
1276 self.insert_path_indices_for_path(path);
1277 } else {
1278 self.trigram_index.remove_file(path);
1279 }
1280
1281 tracing::info!("repaired auxiliary indices for '{path}'");
1282 }
1283
1284 fn insert_path_indices_for_path(&mut self, path: &str) {
1285 if let Some(basename) = basename_key(path) {
1286 insert_sorted_unique(self.files_by_basename.entry(basename).or_default(), path);
1287 }
1288
1289 for component in dir_component_keys(path) {
1290 insert_sorted_unique(
1291 self.files_by_dir_component.entry(component).or_default(),
1292 path,
1293 );
1294 }
1295 }
1296
1297 fn remove_path_indices_for_path(&mut self, path: &str) {
1298 if let Some(basename) = basename_key(path)
1299 && let Some(paths) = self.files_by_basename.get_mut(&basename)
1300 {
1301 remove_sorted_path(paths, path);
1302 if paths.is_empty() {
1303 self.files_by_basename.remove(&basename);
1304 }
1305 }
1306
1307 for component in dir_component_keys(path) {
1308 if let Some(paths) = self.files_by_dir_component.get_mut(&component) {
1309 remove_sorted_path(paths, path);
1310 if paths.is_empty() {
1311 self.files_by_dir_component.remove(&component);
1312 }
1313 }
1314 }
1315 }
1316
1317 pub fn load_source(&self) -> IndexLoadSource {
1319 self.load_source
1320 }
1321
1322 pub fn snapshot_verify_state(&self) -> SnapshotVerifyState {
1324 self.snapshot_verify_state
1325 }
1326
1327 pub(crate) fn mark_snapshot_verify_running(&mut self) {
1328 if self.load_source == IndexLoadSource::SnapshotRestore {
1329 self.snapshot_verify_state = SnapshotVerifyState::Running;
1330 }
1331 }
1332
1333 pub(crate) fn mark_snapshot_verify_completed(&mut self) {
1334 if self.load_source == IndexLoadSource::SnapshotRestore {
1335 self.snapshot_verify_state = SnapshotVerifyState::Completed;
1336 }
1337 }
1338}
1339
1340fn basename_key(path: &str) -> Option<String> {
1341 Path::new(path)
1342 .file_name()
1343 .and_then(|name| name.to_str())
1344 .map(|name| name.to_ascii_lowercase())
1345}
1346
1347fn dir_component_keys(path: &str) -> Vec<String> {
1348 let components: Vec<&str> = path
1349 .split(['/', '\\'])
1350 .filter(|component| !component.is_empty())
1351 .collect();
1352 if components.len() <= 1 {
1353 return Vec::new();
1354 }
1355
1356 let mut seen = HashSet::new();
1357 let mut keys = Vec::new();
1358 for component in &components[..components.len() - 1] {
1359 let key = component.to_ascii_lowercase();
1360 if seen.insert(key.clone()) {
1361 keys.push(key);
1362 }
1363 }
1364 keys.sort();
1365 keys
1366}
1367
1368fn insert_sorted_unique(paths: &mut Vec<String>, path: &str) {
1369 match paths.binary_search_by(|existing| existing.as_str().cmp(path)) {
1370 Ok(_) => {}
1371 Err(pos) => paths.insert(pos, path.to_string()),
1372 }
1373}
1374
1375fn remove_sorted_path(paths: &mut Vec<String>, path: &str) {
1376 if let Ok(pos) = paths.binary_search_by(|existing| existing.as_str().cmp(path)) {
1377 paths.remove(pos);
1378 }
1379}
1380
1381#[cfg(test)]
1382mod tests {
1383 use super::*;
1384 use crate::domain::{
1385 FileOutcome, LanguageId, ReferenceKind, ReferenceRecord, SymbolKind, SymbolRecord,
1386 };
1387 use std::fs;
1388 use tempfile::TempDir;
1389
1390 fn dummy_symbol() -> SymbolRecord {
1391 let byte_range = (0, 10);
1392 SymbolRecord {
1393 name: "foo".to_string(),
1394 kind: SymbolKind::Function,
1395 depth: 0,
1396 sort_order: 0,
1397 byte_range,
1398 item_byte_range: Some(byte_range),
1399 line_range: (0, 1),
1400 doc_byte_range: None,
1401 }
1402 }
1403
1404 fn make_result(outcome: FileOutcome, symbols: Vec<SymbolRecord>) -> FileProcessingResult {
1405 FileProcessingResult {
1406 relative_path: "test.rs".to_string(),
1407 language: LanguageId::Rust,
1408 classification: crate::domain::FileClassification::for_code_path("test.rs"),
1409 outcome,
1410 parse_diagnostic: None,
1411 symbols,
1412 byte_len: 42,
1413 content_hash: "abc123".to_string(),
1414 references: vec![],
1415 alias_map: std::collections::HashMap::new(),
1416 }
1417 }
1418
1419 #[test]
1422 fn test_indexed_file_maps_processed_status() {
1423 let result = make_result(FileOutcome::Processed, vec![dummy_symbol()]);
1424 let indexed = IndexedFile::from_parse_result(result, b"fn foo() {}".to_vec());
1425 assert_eq!(indexed.parse_status, ParseStatus::Parsed);
1426 assert_eq!(indexed.symbols.len(), 1);
1427 }
1428
1429 #[test]
1430 fn test_indexed_file_maps_partial_parse_keeps_symbols() {
1431 let result = make_result(
1432 FileOutcome::PartialParse {
1433 warning: "syntax error".to_string(),
1434 },
1435 vec![dummy_symbol()],
1436 );
1437 let indexed = IndexedFile::from_parse_result(result, b"fn bad(".to_vec());
1438 assert!(matches!(
1439 indexed.parse_status,
1440 ParseStatus::PartialParse { .. }
1441 ));
1442 assert_eq!(
1443 indexed.symbols.len(),
1444 1,
1445 "symbols kept even on partial parse"
1446 );
1447 }
1448
1449 #[test]
1450 fn test_indexed_file_maps_failed_status_empty_symbols_content_preserved() {
1451 let result = make_result(
1452 FileOutcome::Failed {
1453 error: "parse failed".to_string(),
1454 },
1455 vec![],
1456 );
1457 let content = b"some content bytes".to_vec();
1458 let indexed = IndexedFile::from_parse_result(result, content.clone());
1459 assert!(matches!(indexed.parse_status, ParseStatus::Failed { .. }));
1460 assert!(indexed.symbols.is_empty(), "failed parse has no symbols");
1461 assert_eq!(
1462 indexed.content, content,
1463 "content bytes stored even on failure"
1464 );
1465 }
1466
1467 #[test]
1470 fn test_circuit_breaker_does_not_trip_at_20pct_of_10_files() {
1471 let cb = CircuitBreakerState::new(0.20);
1473 for _ in 0..8 {
1474 cb.record_success();
1475 }
1476 for i in 0..2 {
1477 cb.record_failure(&format!("file{i}.rs"), "error");
1478 }
1479 assert!(
1480 !cb.should_abort(),
1481 "2/10 = 20% should NOT trip (threshold not exceeded)"
1482 );
1483 }
1484
1485 #[test]
1486 fn test_circuit_breaker_trips_at_30pct_of_10_files() {
1487 let cb = CircuitBreakerState::new(0.20);
1489 for _ in 0..7 {
1490 cb.record_success();
1491 }
1492 for i in 0..3 {
1493 cb.record_failure(&format!("file{i}.rs"), "error");
1494 }
1495 assert!(cb.should_abort(), "3/10 = 30% should trip");
1496 }
1497
1498 #[test]
1499 fn test_circuit_breaker_does_not_trip_on_tiny_repos() {
1500 let cb = CircuitBreakerState::new(0.20);
1502 cb.record_failure("a.rs", "err");
1503 cb.record_failure("b.rs", "err");
1504 cb.record_failure("c.rs", "err");
1505 assert!(
1507 !cb.should_abort(),
1508 "< 5 files processed: circuit breaker must not trip"
1509 );
1510 }
1511
1512 #[test]
1513 fn test_circuit_breaker_threshold_configurable() {
1514 let cb = CircuitBreakerState::new(0.10);
1516 for _ in 0..9 {
1517 cb.record_success();
1518 }
1519 cb.record_failure("file.rs", "error");
1520 assert!(!cb.should_abort(), "10% == threshold, not exceeded");
1522
1523 let cb2 = CircuitBreakerState::new(0.10);
1525 for _ in 0..8 {
1526 cb2.record_success();
1527 }
1528 for i in 0..2 {
1529 cb2.record_failure(&format!("file{i}.rs"), "error");
1530 }
1531 assert!(cb2.should_abort(), "20% > 10% threshold should trip");
1533 }
1534
1535 fn write_file(dir: &Path, name: &str, content: &str) {
1538 let path = dir.join(name);
1539 if let Some(p) = path.parent() {
1540 fs::create_dir_all(p).unwrap();
1541 }
1542 fs::write(path, content).unwrap();
1543 }
1544
1545 #[test]
1546 fn test_live_index_load_valid_files_produces_ready_state() {
1547 let tmp = TempDir::new().unwrap();
1548 write_file(tmp.path(), "a.rs", "fn alpha() {}");
1549 write_file(tmp.path(), "b.py", "def beta(): pass");
1550 write_file(tmp.path(), "c.js", "function gamma() {}");
1551 write_file(tmp.path(), "d.ts", "function delta(): void {}");
1552 write_file(tmp.path(), "e.go", "package main\nfunc epsilon() {}");
1553
1554 let shared = LiveIndex::load(tmp.path()).unwrap();
1555 let index = shared.read();
1556 assert!(
1557 !index.cb_state.is_tripped(),
1558 "valid files should not trip circuit breaker"
1559 );
1560 assert_eq!(index.file_count(), 5);
1561 assert_eq!(index.load_source(), IndexLoadSource::FreshLoad);
1562 assert_eq!(
1563 index.snapshot_verify_state(),
1564 SnapshotVerifyState::NotNeeded
1565 );
1566 }
1567
1568 #[test]
1569 fn test_live_index_load_circuit_breaker_not_tripped_with_all_languages() {
1570 let tmp = TempDir::new().unwrap();
1573 write_file(tmp.path(), "a.rs", "fn alpha() {}");
1574 write_file(tmp.path(), "b.py", "def beta(): pass");
1575 write_file(tmp.path(), "c.js", "function gamma() {}");
1576 write_file(tmp.path(), "x.swift", "class A {}");
1578 write_file(tmp.path(), "y.php", "<?php class B {}");
1579 write_file(tmp.path(), "z.pl", "sub greet { print \"hi\"; }");
1580
1581 let shared = LiveIndex::load(tmp.path()).unwrap();
1582 let index = shared.read();
1583 assert!(
1584 !index.cb_state.is_tripped(),
1585 "all-parseable files should not trip circuit breaker"
1586 );
1587 }
1588
1589 #[test]
1590 fn test_live_index_file_count() {
1591 let tmp = TempDir::new().unwrap();
1592 write_file(tmp.path(), "a.rs", "fn a() {}");
1593 write_file(tmp.path(), "b.rs", "fn b() {}");
1594 write_file(tmp.path(), "c.rs", "fn c() {}");
1595
1596 let shared = LiveIndex::load(tmp.path()).unwrap();
1597 let index = shared.read();
1598 assert_eq!(index.file_count(), 3);
1599 }
1600
1601 #[test]
1602 fn test_live_index_symbol_count() {
1603 let tmp = TempDir::new().unwrap();
1604 write_file(tmp.path(), "a.rs", "fn foo() {}\nfn bar() {}");
1605 write_file(tmp.path(), "b.rs", "fn baz() {}");
1606
1607 let shared = LiveIndex::load(tmp.path()).unwrap();
1608 let index = shared.read();
1609 assert_eq!(index.symbol_count(), 3);
1611 }
1612
1613 #[test]
1616 fn test_live_index_empty_has_zero_files() {
1617 let shared = LiveIndex::empty();
1618 let index = shared.read();
1619 assert_eq!(index.file_count(), 0);
1620 assert_eq!(index.load_source(), IndexLoadSource::EmptyBootstrap);
1621 assert_eq!(
1622 index.snapshot_verify_state(),
1623 SnapshotVerifyState::NotNeeded
1624 );
1625 }
1626
1627 #[test]
1628 fn test_shared_index_handle_preserves_read_write_access() {
1629 let shared = LiveIndex::empty();
1630 {
1631 let mut live = shared.write();
1632 live.add_file(
1633 "src/new.rs".to_string(),
1634 make_indexed_file_for_mutation("src/new.rs"),
1635 );
1636 }
1637
1638 let index = shared.read();
1639 assert!(index.get_file("src/new.rs").is_some());
1640 }
1641
1642 #[test]
1643 fn test_shared_index_handle_published_state_tracks_generation_and_counts() {
1644 let shared = LiveIndex::empty();
1645 let initial = shared.published_state();
1646 assert_eq!(initial.generation, 0);
1647 assert_eq!(initial.status, PublishedIndexStatus::Empty);
1648 assert_eq!(initial.degraded_summary, None);
1649 assert_eq!(initial.file_count, 0);
1650 assert_eq!(initial.parsed_count, 0);
1651 assert_eq!(initial.partial_parse_count, 0);
1652 assert_eq!(initial.failed_count, 0);
1653 assert_eq!(initial.load_source, IndexLoadSource::EmptyBootstrap);
1654
1655 shared.add_file(
1656 "src/new.rs".to_string(),
1657 make_indexed_file_for_mutation("src/new.rs"),
1658 );
1659 let after_add = shared.published_state();
1660 assert_eq!(after_add.generation, 1);
1661 assert_eq!(after_add.status, PublishedIndexStatus::Ready);
1662 assert_eq!(after_add.degraded_summary, None);
1663 assert_eq!(after_add.file_count, 1);
1664 assert_eq!(after_add.parsed_count, 1);
1665 assert_eq!(after_add.partial_parse_count, 0);
1666 assert_eq!(after_add.failed_count, 0);
1667 assert_eq!(after_add.symbol_count, 1);
1668
1669 shared.remove_file("src/new.rs");
1670 let after_remove = shared.published_state();
1671 assert_eq!(after_remove.generation, 2);
1672 assert_eq!(after_remove.status, PublishedIndexStatus::Ready);
1673 assert_eq!(after_remove.degraded_summary, None);
1674 assert_eq!(after_remove.file_count, 0);
1675 assert_eq!(after_remove.symbol_count, 0);
1676 }
1677
1678 #[test]
1679 fn test_shared_index_handle_write_guard_publishes_on_drop() {
1680 let shared = LiveIndex::empty();
1681
1682 {
1683 let mut live = shared.write();
1684 live.add_file(
1685 "src/new.rs".to_string(),
1686 make_indexed_file_for_mutation("src/new.rs"),
1687 );
1688 }
1689
1690 let after_add = shared.published_state();
1691 assert_eq!(after_add.generation, 1);
1692 assert_eq!(after_add.status, PublishedIndexStatus::Ready);
1693 assert_eq!(after_add.degraded_summary, None);
1694 assert_eq!(after_add.file_count, 1);
1695
1696 {
1697 let mut live = shared.write();
1698 live.remove_file("src/new.rs");
1699 }
1700
1701 let after_remove = shared.published_state();
1702 assert_eq!(after_remove.generation, 2);
1703 assert_eq!(after_remove.status, PublishedIndexStatus::Ready);
1704 assert_eq!(after_remove.degraded_summary, None);
1705 assert_eq!(after_remove.file_count, 0);
1706 }
1707
1708 #[test]
1709 fn test_shared_index_handle_published_state_tracks_verify_transitions() {
1710 let mut live = make_empty_live_index();
1711 live.is_empty = false;
1712 live.load_source = IndexLoadSource::SnapshotRestore;
1713 live.snapshot_verify_state = SnapshotVerifyState::Pending;
1714 let shared = SharedIndexHandle::shared(live);
1715
1716 shared.mark_snapshot_verify_running();
1717 let running = shared.published_state();
1718 assert_eq!(running.generation, 1);
1719 assert_eq!(running.status, PublishedIndexStatus::Ready);
1720 assert_eq!(running.degraded_summary, None);
1721 assert_eq!(running.snapshot_verify_state, SnapshotVerifyState::Running);
1722
1723 shared.mark_snapshot_verify_completed();
1724 let completed = shared.published_state();
1725 assert_eq!(completed.generation, 2);
1726 assert_eq!(
1727 completed.snapshot_verify_state,
1728 SnapshotVerifyState::Completed
1729 );
1730 }
1731
1732 #[test]
1733 fn test_shared_index_handle_published_state_captures_degraded_summary() {
1734 let mut live = make_empty_live_index();
1735 live.is_empty = false;
1736 for _ in 0..3 {
1737 live.cb_state.record_failure("src/bad.rs", "parse failure");
1738 }
1739 for _ in 0..7 {
1740 live.cb_state.record_success();
1741 }
1742 assert!(live.cb_state.should_abort(), "circuit breaker should trip");
1743 let shared = SharedIndexHandle::shared(live);
1744
1745 let published = shared.published_state();
1746 assert_eq!(published.status, PublishedIndexStatus::Degraded);
1747 assert!(
1748 published
1749 .degraded_summary
1750 .as_deref()
1751 .is_some_and(|summary| summary.contains("circuit breaker tripped")),
1752 "expected degraded summary, got {:?}",
1753 published.degraded_summary
1754 );
1755 }
1756
1757 #[test]
1758 fn test_shared_index_handle_published_repo_outline_tracks_mutations() {
1759 let shared = LiveIndex::empty();
1760
1761 let initial = shared.published_repo_outline();
1762 assert_eq!(initial.total_files, 0);
1763 assert_eq!(initial.total_symbols, 0);
1764 assert!(initial.files.is_empty());
1765
1766 shared.add_file(
1767 "src/main.rs".to_string(),
1768 make_indexed_file_for_mutation("src/main.rs"),
1769 );
1770 let after_add = shared.published_repo_outline();
1771 assert_eq!(after_add.total_files, 1);
1772 assert_eq!(after_add.total_symbols, 1);
1773 assert_eq!(after_add.files[0].relative_path, "src/main.rs");
1774
1775 {
1776 let mut live = shared.write();
1777 live.remove_file("src/main.rs");
1778 }
1779 let after_remove = shared.published_repo_outline();
1780 assert_eq!(after_remove.total_files, 0);
1781 assert_eq!(after_remove.total_symbols, 0);
1782 assert!(after_remove.files.is_empty());
1783 }
1784
1785 #[test]
1786 fn test_live_index_empty_returns_empty_state() {
1787 let shared = LiveIndex::empty();
1788 let index = shared.read();
1789 assert_eq!(index.index_state(), IndexState::Empty);
1790 }
1791
1792 #[test]
1793 fn test_live_index_empty_is_not_ready() {
1794 let shared = LiveIndex::empty();
1795 let index = shared.read();
1796 assert!(!index.is_ready(), "empty index should not be ready");
1797 }
1798
1799 #[test]
1800 fn test_live_index_reload_loads_files_and_becomes_ready() {
1801 let tmp = TempDir::new().unwrap();
1802 write_file(tmp.path(), "a.rs", "fn alpha() {}");
1803 write_file(tmp.path(), "b.rs", "fn beta() {}");
1804
1805 let shared = LiveIndex::empty();
1806 {
1807 let mut index = shared.write();
1808 index.reload(tmp.path()).expect("reload should succeed");
1809 }
1810 let index = shared.read();
1811 assert_eq!(index.file_count(), 2);
1812 assert!(index.is_ready(), "after reload should be ready");
1813 assert_eq!(index.index_state(), IndexState::Ready);
1814 assert_eq!(index.load_source(), IndexLoadSource::FreshLoad);
1815 assert_eq!(
1816 index.snapshot_verify_state(),
1817 SnapshotVerifyState::NotNeeded
1818 );
1819 }
1820
1821 #[test]
1822 fn test_live_index_reload_invalid_root_returns_error() {
1823 let shared = LiveIndex::empty();
1824 let mut index = shared.write();
1825 let result = index.reload(Path::new("/nonexistent/path/that/does/not/exist"));
1826 assert!(
1827 result.is_err(),
1828 "reload on invalid root should return error"
1829 );
1830 }
1831
1832 #[test]
1833 fn test_live_index_loaded_at_system_is_recent() {
1834 use std::time::SystemTime;
1835 let before = SystemTime::now();
1836 let shared = LiveIndex::empty();
1837 let index = shared.read();
1838 let after = SystemTime::now();
1839 let ts = index.loaded_at_system();
1840 assert!(
1841 ts >= before,
1842 "loaded_at_system should be >= before creation"
1843 );
1844 assert!(ts <= after, "loaded_at_system should be <= after creation");
1845 }
1846
1847 #[test]
1848 fn test_concurrent_readers_no_deadlock() {
1849 use std::thread;
1850
1851 let tmp = TempDir::new().unwrap();
1852 write_file(tmp.path(), "a.rs", "fn foo() {}");
1853 write_file(tmp.path(), "b.rs", "fn bar() {}");
1854 write_file(tmp.path(), "c.rs", "fn baz() {}");
1855
1856 let shared = LiveIndex::load(tmp.path()).unwrap();
1857
1858 let handles: Vec<_> = (0..8)
1859 .map(|_| {
1860 let shared_clone = Arc::clone(&shared);
1861 thread::spawn(move || {
1862 let index = shared_clone.read();
1863 let _ = index.file_count();
1864 let _ = index.symbol_count();
1865 })
1866 })
1867 .collect();
1868
1869 for h in handles {
1870 h.join().expect("reader thread should not panic");
1871 }
1872 }
1873
1874 fn make_indexed_file_for_mutation(path: &str) -> IndexedFile {
1877 IndexedFile {
1878 relative_path: path.to_string(),
1879 language: LanguageId::Rust,
1880 classification: crate::domain::FileClassification::for_code_path(path),
1881 content: b"fn test() {}".to_vec(),
1882 symbols: vec![dummy_symbol()],
1883 parse_status: ParseStatus::Parsed,
1884 parse_diagnostic: None,
1885 byte_len: 12,
1886 content_hash: "abc123".to_string(),
1887 references: vec![],
1888 alias_map: std::collections::HashMap::new(),
1889 mtime_secs: 0,
1890 }
1891 }
1892
1893 fn make_empty_live_index() -> LiveIndex {
1894 LiveIndex {
1895 files: HashMap::new(),
1896 loaded_at: Instant::now(),
1897 loaded_at_system: SystemTime::now(),
1898 load_duration: Duration::ZERO,
1899 cb_state: CircuitBreakerState::new(0.20),
1900 is_empty: false,
1901 load_source: IndexLoadSource::FreshLoad,
1902 snapshot_verify_state: SnapshotVerifyState::NotNeeded,
1903 reverse_index: HashMap::new(),
1904 files_by_basename: HashMap::new(),
1905 files_by_dir_component: HashMap::new(),
1906 trigram_index: crate::live_index::trigram::TrigramIndex::new(),
1907 gitignore: None,
1908 skipped_files: Vec::new(),
1909 }
1910 }
1911
1912 #[test]
1913 fn test_live_index_load_builds_path_indices() {
1914 let dir = TempDir::new().expect("failed to create tempdir");
1915 fs::create_dir_all(dir.path().join("src")).expect("failed to create src dir");
1916 fs::create_dir_all(dir.path().join("tests")).expect("failed to create tests dir");
1917 write_file(dir.path(), "src/lib.rs", "pub fn lib_fn() {}");
1918 write_file(dir.path(), "tests/lib.rs", "fn test_lib() {}");
1919
1920 let shared = LiveIndex::load(dir.path()).expect("LiveIndex::load failed");
1921 let index = shared.read();
1922
1923 assert_eq!(
1924 index.files_by_basename.get("lib.rs"),
1925 Some(&vec!["src/lib.rs".to_string(), "tests/lib.rs".to_string()])
1926 );
1927 assert_eq!(
1928 index.files_by_dir_component.get("src"),
1929 Some(&vec!["src/lib.rs".to_string()])
1930 );
1931 assert_eq!(
1932 index.files_by_dir_component.get("tests"),
1933 Some(&vec!["tests/lib.rs".to_string()])
1934 );
1935 }
1936
1937 #[test]
1938 fn test_live_index_reload_rebuilds_path_indices() {
1939 let dir = TempDir::new().expect("failed to create tempdir");
1940 fs::create_dir_all(dir.path().join("src")).expect("failed to create src dir");
1941 write_file(dir.path(), "src/alpha.rs", "fn alpha() {}");
1942
1943 let shared = LiveIndex::load(dir.path()).expect("LiveIndex::load failed");
1944
1945 fs::remove_file(dir.path().join("src/alpha.rs")).expect("failed to remove alpha");
1946 fs::create_dir_all(dir.path().join("tests")).expect("failed to create tests dir");
1947 write_file(dir.path(), "tests/beta.rs", "fn beta() {}");
1948
1949 {
1950 let mut index = shared.write();
1951 index.reload(dir.path()).expect("reload should succeed");
1952 }
1953
1954 let index = shared.read();
1955 assert!(!index.files_by_basename.contains_key("alpha.rs"));
1956 assert_eq!(
1957 index.files_by_basename.get("beta.rs"),
1958 Some(&vec!["tests/beta.rs".to_string()])
1959 );
1960 assert!(!index.files_by_dir_component.contains_key("src"));
1961 assert_eq!(
1962 index.files_by_dir_component.get("tests"),
1963 Some(&vec!["tests/beta.rs".to_string()])
1964 );
1965 }
1966
1967 #[test]
1968 fn test_dir_component_keys_deduplicate_and_accept_backslashes() {
1969 assert_eq!(
1970 dir_component_keys("src\\live_index\\src\\store.rs"),
1971 vec!["live_index".to_string(), "src".to_string()]
1972 );
1973 }
1974
1975 #[test]
1976 fn test_update_file_inserts_and_updates_timestamp() {
1977 let mut index = make_empty_live_index();
1978 let before = SystemTime::now();
1979 let file = make_indexed_file_for_mutation("src/new.rs");
1980 index.update_file("src/new.rs".to_string(), file);
1981 let after = SystemTime::now();
1982
1983 assert!(
1984 index.get_file("src/new.rs").is_some(),
1985 "file should be inserted"
1986 );
1987 assert_eq!(
1988 index.files_by_basename.get("new.rs"),
1989 Some(&vec!["src/new.rs".to_string()])
1990 );
1991 assert_eq!(
1992 index.files_by_dir_component.get("src"),
1993 Some(&vec!["src/new.rs".to_string()])
1994 );
1995 let ts = index.loaded_at_system;
1996 assert!(ts >= before, "loaded_at_system should be >= before update");
1997 assert!(ts <= after, "loaded_at_system should be <= after update");
1998 }
1999
2000 #[test]
2001 fn test_update_file_replaces_existing() {
2002 let mut index = make_empty_live_index();
2003 let file1 = IndexedFile {
2004 relative_path: "src/foo.rs".to_string(),
2005 language: LanguageId::Rust,
2006 classification: crate::domain::FileClassification::for_code_path("src/foo.rs"),
2007 content: b"fn old() {}".to_vec(),
2008 symbols: vec![],
2009 parse_status: ParseStatus::Parsed,
2010 parse_diagnostic: None,
2011 byte_len: 11,
2012 content_hash: "old_hash".to_string(),
2013 references: vec![],
2014 alias_map: std::collections::HashMap::new(),
2015 mtime_secs: 0,
2016 };
2017 index.update_file("src/foo.rs".to_string(), file1);
2018
2019 let file2 = IndexedFile {
2020 relative_path: "src/foo.rs".to_string(),
2021 language: LanguageId::Rust,
2022 classification: crate::domain::FileClassification::for_code_path("src/foo.rs"),
2023 content: b"fn new() {}".to_vec(),
2024 symbols: vec![dummy_symbol()],
2025 parse_status: ParseStatus::Parsed,
2026 parse_diagnostic: None,
2027 byte_len: 11,
2028 content_hash: "new_hash".to_string(),
2029 references: vec![],
2030 alias_map: std::collections::HashMap::new(),
2031 mtime_secs: 0,
2032 };
2033 index.update_file("src/foo.rs".to_string(), file2);
2034
2035 let retrieved = index.get_file("src/foo.rs").unwrap();
2036 assert_eq!(
2037 retrieved.content_hash, "new_hash",
2038 "should have replaced the file"
2039 );
2040 assert_eq!(index.file_count(), 1, "should still have exactly 1 file");
2041 assert_eq!(
2042 index.files_by_basename.get("foo.rs"),
2043 Some(&vec!["src/foo.rs".to_string()])
2044 );
2045 assert_eq!(
2046 index.files_by_dir_component.get("src"),
2047 Some(&vec!["src/foo.rs".to_string()])
2048 );
2049 }
2050
2051 #[test]
2052 fn test_add_file_inserts_new() {
2053 let mut index = make_empty_live_index();
2054 assert_eq!(index.file_count(), 0);
2055
2056 let file = make_indexed_file_for_mutation("src/new.rs");
2057 index.add_file("src/new.rs".to_string(), file);
2058
2059 assert_eq!(
2060 index.file_count(),
2061 1,
2062 "file count should increase by 1 after add_file"
2063 );
2064 assert!(index.get_file("src/new.rs").is_some());
2065 }
2066
2067 #[test]
2068 fn test_remove_file_removes_existing() {
2069 let mut index = make_empty_live_index();
2070 let file = make_indexed_file_for_mutation("src/to_delete.rs");
2071 index.update_file("src/to_delete.rs".to_string(), file);
2072 assert_eq!(index.file_count(), 1);
2073
2074 index.remove_file("src/to_delete.rs");
2075 assert!(
2076 index.get_file("src/to_delete.rs").is_none(),
2077 "file should be removed"
2078 );
2079 assert_eq!(index.file_count(), 0);
2080 assert!(!index.files_by_basename.contains_key("to_delete.rs"));
2081 assert!(!index.files_by_dir_component.contains_key("src"));
2082 }
2083
2084 #[test]
2085 fn test_remove_file_nonexistent_is_noop() {
2086 let mut index = make_empty_live_index();
2087 let known_ts = index.loaded_at_system;
2089 std::thread::sleep(Duration::from_millis(5));
2091
2092 index.remove_file("nonexistent.rs");
2093
2094 assert_eq!(
2095 index.loaded_at_system, known_ts,
2096 "loaded_at_system must NOT change when removing non-existent file"
2097 );
2098 }
2099
2100 #[test]
2101 fn test_file_count_after_mutations() {
2102 let mut index = make_empty_live_index();
2103 assert_eq!(index.file_count(), 0);
2104
2105 index.add_file("a.rs".to_string(), make_indexed_file_for_mutation("a.rs"));
2106 assert_eq!(index.file_count(), 1);
2107
2108 index.add_file("b.rs".to_string(), make_indexed_file_for_mutation("b.rs"));
2109 assert_eq!(index.file_count(), 2);
2110
2111 index.update_file("a.rs".to_string(), make_indexed_file_for_mutation("a.rs"));
2112 assert_eq!(index.file_count(), 2, "update does not add a new entry");
2113
2114 index.remove_file("a.rs");
2115 assert_eq!(index.file_count(), 1);
2116
2117 index.remove_file("nonexistent.rs");
2118 assert_eq!(
2119 index.file_count(),
2120 1,
2121 "removing nonexistent does not change count"
2122 );
2123 }
2124
2125 fn make_ref(name: &str, kind: ReferenceKind, line: u32) -> ReferenceRecord {
2128 ReferenceRecord {
2129 name: name.to_string(),
2130 qualified_name: None,
2131 kind,
2132 byte_range: (0, 1),
2133 line_range: (line, line),
2134 enclosing_symbol_index: None,
2135 }
2136 }
2137
2138 fn make_indexed_file_with_refs(path: &str, refs: Vec<ReferenceRecord>) -> IndexedFile {
2139 IndexedFile {
2140 relative_path: path.to_string(),
2141 language: LanguageId::Rust,
2142 classification: crate::domain::FileClassification::for_code_path(path),
2143 content: b"fn test() {}".to_vec(),
2144 symbols: vec![],
2145 parse_status: ParseStatus::Parsed,
2146 parse_diagnostic: None,
2147 byte_len: 12,
2148 content_hash: "abc".to_string(),
2149 references: refs,
2150 alias_map: std::collections::HashMap::new(),
2151 mtime_secs: 0,
2152 }
2153 }
2154
2155 #[test]
2156 fn test_indexed_file_from_parse_result_transfers_refs_and_alias_map() {
2157 use std::collections::HashMap;
2158 let mut alias_map = HashMap::new();
2159 alias_map.insert("Map".to_string(), "HashMap".to_string());
2160 let refs = vec![make_ref("foo", ReferenceKind::Call, 1)];
2161
2162 let result = FileProcessingResult {
2163 relative_path: "test.rs".to_string(),
2164 language: LanguageId::Rust,
2165 classification: crate::domain::FileClassification::for_code_path("test.rs"),
2166 outcome: FileOutcome::Processed,
2167 parse_diagnostic: None,
2168 symbols: vec![],
2169 byte_len: 0,
2170 content_hash: "abc".to_string(),
2171 references: refs.clone(),
2172 alias_map: alias_map.clone(),
2173 };
2174
2175 let indexed = IndexedFile::from_parse_result(result, vec![]);
2176 assert_eq!(indexed.references.len(), 1);
2177 assert_eq!(indexed.references[0].name, "foo");
2178 assert_eq!(
2179 indexed.alias_map.get("Map").map(|s| s.as_str()),
2180 Some("HashMap")
2181 );
2182 }
2183
2184 #[test]
2185 fn test_rebuild_reverse_index_builds_name_to_locations() {
2186 let mut index = make_empty_live_index();
2187
2188 let refs_a = vec![
2189 make_ref("process", ReferenceKind::Call, 5),
2190 make_ref("load", ReferenceKind::Call, 10),
2191 ];
2192 let refs_b = vec![make_ref("process", ReferenceKind::Call, 3)];
2193
2194 index.add_file(
2195 "a.rs".to_string(),
2196 make_indexed_file_with_refs("a.rs", refs_a),
2197 );
2198 index.add_file(
2199 "b.rs".to_string(),
2200 make_indexed_file_with_refs("b.rs", refs_b),
2201 );
2202
2203 let locs = index
2205 .reverse_index
2206 .get("process")
2207 .expect("process should be in reverse index");
2208 assert_eq!(locs.len(), 2, "process referenced in 2 files");
2209
2210 let locs_load = index
2212 .reverse_index
2213 .get("load")
2214 .expect("load should be in reverse index");
2215 assert_eq!(locs_load.len(), 1);
2216 assert_eq!(locs_load[0].file_path, "a.rs");
2217 assert_eq!(locs_load[0].reference_idx, 1);
2218 }
2219
2220 #[test]
2221 fn test_rebuild_reverse_index_consistent_after_update_file() {
2222 let mut index = make_empty_live_index();
2223
2224 let refs_old = vec![make_ref("old_func", ReferenceKind::Call, 1)];
2225 index.add_file(
2226 "src.rs".to_string(),
2227 make_indexed_file_with_refs("src.rs", refs_old),
2228 );
2229 assert!(index.reverse_index.contains_key("old_func"));
2230
2231 let refs_new = vec![make_ref("new_func", ReferenceKind::Call, 1)];
2232 index.update_file(
2233 "src.rs".to_string(),
2234 make_indexed_file_with_refs("src.rs", refs_new),
2235 );
2236
2237 assert!(
2238 !index.reverse_index.contains_key("old_func"),
2239 "stale entry should be gone"
2240 );
2241 assert!(
2242 index.reverse_index.contains_key("new_func"),
2243 "new entry should be present"
2244 );
2245 }
2246
2247 #[test]
2248 fn test_rebuild_reverse_index_excludes_removed_file() {
2249 let mut index = make_empty_live_index();
2250
2251 let refs = vec![make_ref("target_fn", ReferenceKind::Call, 2)];
2252 index.add_file(
2253 "will_delete.rs".to_string(),
2254 make_indexed_file_with_refs("will_delete.rs", refs),
2255 );
2256 assert!(index.reverse_index.contains_key("target_fn"));
2257
2258 index.remove_file("will_delete.rs");
2259 assert!(
2260 !index.reverse_index.contains_key("target_fn"),
2261 "removed file's refs should be gone"
2262 );
2263 }
2264
2265 #[test]
2266 fn test_reference_location_fields() {
2267 let loc = ReferenceLocation {
2268 file_path: "src/main.rs".to_string(),
2269 reference_idx: 3,
2270 };
2271 assert_eq!(loc.file_path, "src/main.rs");
2272 assert_eq!(loc.reference_idx, 3);
2273 }
2274
2275 #[test]
2276 fn test_empty_live_index_has_empty_reverse_index() {
2277 let index = make_empty_live_index();
2278 assert!(
2279 index.reverse_index.is_empty(),
2280 "fresh index should have empty reverse index"
2281 );
2282 }
2283
2284 #[test]
2285 fn test_incremental_reverse_index_matches_full_rebuild() {
2286 let mut index = make_empty_live_index();
2287
2288 let refs_a = vec![
2290 make_ref("shared_fn", ReferenceKind::Call, 1),
2291 make_ref("only_a", ReferenceKind::Call, 5),
2292 ];
2293 let refs_b = vec![
2294 make_ref("shared_fn", ReferenceKind::Call, 2),
2295 make_ref("only_b", ReferenceKind::Call, 8),
2296 ];
2297 index.add_file(
2298 "a.rs".to_string(),
2299 make_indexed_file_with_refs("a.rs", refs_a),
2300 );
2301 index.add_file(
2302 "b.rs".to_string(),
2303 make_indexed_file_with_refs("b.rs", refs_b),
2304 );
2305
2306 let refs_a_new = vec![
2308 make_ref("shared_fn", ReferenceKind::Call, 1),
2309 make_ref("replaced_a", ReferenceKind::Call, 10),
2310 ];
2311 index.update_file(
2312 "a.rs".to_string(),
2313 make_indexed_file_with_refs("a.rs", refs_a_new),
2314 );
2315
2316 let incremental: HashMap<String, Vec<(String, u32)>> = index
2318 .reverse_index
2319 .iter()
2320 .map(|(k, v)| {
2321 let mut locs: Vec<(String, u32)> = v
2322 .iter()
2323 .map(|l| (l.file_path.clone(), l.reference_idx))
2324 .collect();
2325 locs.sort();
2326 (k.clone(), locs)
2327 })
2328 .collect();
2329
2330 index.rebuild_reverse_index();
2332 let full_rebuild: HashMap<String, Vec<(String, u32)>> = index
2333 .reverse_index
2334 .iter()
2335 .map(|(k, v)| {
2336 let mut locs: Vec<(String, u32)> = v
2337 .iter()
2338 .map(|l| (l.file_path.clone(), l.reference_idx))
2339 .collect();
2340 locs.sort();
2341 (k.clone(), locs)
2342 })
2343 .collect();
2344
2345 assert_eq!(
2346 incremental, full_rebuild,
2347 "incremental update should produce same result as full rebuild"
2348 );
2349
2350 assert!(
2352 !index.reverse_index.contains_key("only_a"),
2353 "only_a should be gone after update"
2354 );
2355 assert!(
2356 index.reverse_index.contains_key("replaced_a"),
2357 "replaced_a should be present"
2358 );
2359 assert!(
2360 index.reverse_index.contains_key("only_b"),
2361 "only_b should still be present from b.rs"
2362 );
2363 let shared = index.reverse_index.get("shared_fn").unwrap();
2364 assert_eq!(shared.len(), 2, "shared_fn still referenced in both files");
2365 }
2366
2367 #[test]
2368 fn test_incremental_reverse_index_remove() {
2369 let mut index = make_empty_live_index();
2370
2371 let refs_a = vec![
2372 make_ref("common", ReferenceKind::Call, 1),
2373 make_ref("unique_a", ReferenceKind::Call, 3),
2374 ];
2375 let refs_b = vec![
2376 make_ref("common", ReferenceKind::Call, 2),
2377 make_ref("unique_b", ReferenceKind::Call, 4),
2378 ];
2379 index.add_file(
2380 "a.rs".to_string(),
2381 make_indexed_file_with_refs("a.rs", refs_a),
2382 );
2383 index.add_file(
2384 "b.rs".to_string(),
2385 make_indexed_file_with_refs("b.rs", refs_b),
2386 );
2387
2388 index.remove_file("a.rs");
2390
2391 assert!(
2393 !index.reverse_index.contains_key("unique_a"),
2394 "unique_a should be removed with a.rs"
2395 );
2396 assert!(
2398 index.reverse_index.contains_key("unique_b"),
2399 "unique_b should survive"
2400 );
2401 let common_locs = index
2403 .reverse_index
2404 .get("common")
2405 .expect("common should still exist from b.rs");
2406 assert_eq!(common_locs.len(), 1);
2407 assert_eq!(common_locs[0].file_path, "b.rs");
2408
2409 let incremental: HashMap<String, Vec<(String, u32)>> = index
2411 .reverse_index
2412 .iter()
2413 .map(|(k, v)| {
2414 let mut locs: Vec<(String, u32)> = v
2415 .iter()
2416 .map(|l| (l.file_path.clone(), l.reference_idx))
2417 .collect();
2418 locs.sort();
2419 (k.clone(), locs)
2420 })
2421 .collect();
2422
2423 index.rebuild_reverse_index();
2424 let full_rebuild: HashMap<String, Vec<(String, u32)>> = index
2425 .reverse_index
2426 .iter()
2427 .map(|(k, v)| {
2428 let mut locs: Vec<(String, u32)> = v
2429 .iter()
2430 .map(|l| (l.file_path.clone(), l.reference_idx))
2431 .collect();
2432 locs.sort();
2433 (k.clone(), locs)
2434 })
2435 .collect();
2436
2437 assert_eq!(
2438 incremental, full_rebuild,
2439 "incremental remove should match full rebuild"
2440 );
2441 }
2442
2443 #[test]
2446 fn test_circuit_breaker_deterministic_after_sort() {
2447 let mut results: Vec<(String, bool)> = vec![
2459 ("a/f00.rs".to_string(), true),
2460 ("a/f01.rs".to_string(), true),
2461 ("a/f02.rs".to_string(), true),
2462 ("a/f03.rs".to_string(), true),
2463 ("a/f04.rs".to_string(), true),
2464 ("a/f05.rs".to_string(), false),
2465 ("a/f06.rs".to_string(), false),
2466 ("a/f07.rs".to_string(), false),
2467 ("a/f08.rs".to_string(), false),
2468 ("a/f09.rs".to_string(), false),
2469 ];
2470
2471 let run_cb = |items: &[(String, bool)]| -> Option<String> {
2473 let cb = CircuitBreakerState::new(0.20);
2474 for (path, ok) in items {
2475 if *ok {
2476 cb.record_success();
2477 } else {
2478 cb.record_failure(path, "parse error");
2479 }
2480 if cb.should_abort() {
2481 return Some(path.clone());
2482 }
2483 }
2484 None
2485 };
2486
2487 results.sort_by(|a, b| a.0.cmp(&b.0));
2489 let trip_sorted = run_cb(&results);
2490
2491 results.reverse();
2493 results.sort_by(|a, b| a.0.cmp(&b.0)); let trip_sorted2 = run_cb(&results);
2495
2496 assert_eq!(
2498 trip_sorted, trip_sorted2,
2499 "sorted runs must trip at the same path"
2500 );
2501 assert!(trip_sorted.is_some(), "circuit breaker should have tripped");
2502
2503 let mut reversed: Vec<(String, bool)> = results.clone();
2505 reversed.reverse(); let trip_unsorted = run_cb(&reversed);
2507
2508 assert_ne!(
2511 trip_sorted, trip_unsorted,
2512 "unsorted order should trip at a different (earlier) path, proving sort is needed"
2513 );
2514 }
2515
2516 #[test]
2517 fn test_tier_counts() {
2518 use crate::domain::index::{AdmissionDecision, AdmissionTier, SkipReason, SkippedFile};
2519
2520 let mut index = make_empty_live_index();
2521 assert_eq!(index.tier_counts(), (0, 0, 0));
2522
2523 index.add_skipped_file(SkippedFile {
2524 path: "model.bin".into(),
2525 size: 1000,
2526 extension: Some("bin".into()),
2527 decision: AdmissionDecision::skip(
2528 AdmissionTier::MetadataOnly,
2529 SkipReason::DenylistedExtension,
2530 ),
2531 });
2532 index.add_skipped_file(SkippedFile {
2533 path: "huge.dat".into(),
2534 size: 200_000_000,
2535 extension: Some("dat".into()),
2536 decision: AdmissionDecision::skip(AdmissionTier::HardSkip, SkipReason::SizeCeiling),
2537 });
2538
2539 assert_eq!(index.tier_counts(), (0, 1, 1));
2540 }
2541}