1use std::collections::HashMap;
2use std::path::{Path, PathBuf};
3use std::time::UNIX_EPOCH;
4
5use serde::{Deserialize, Serialize};
6
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct CodeChunk {
9 pub file_path: String,
10 pub symbol_name: String,
11 pub kind: ChunkKind,
12 pub start_line: usize,
13 pub end_line: usize,
14 pub content: String,
15 pub tokens: Vec<String>,
16 pub token_count: usize,
17}
18
19#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
20pub enum ChunkKind {
21 Function,
22 Struct,
23 Impl,
24 Module,
25 Class,
26 Method,
27 Other,
28}
29
30#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
31pub struct IndexedFileState {
32 pub mtime_ms: u64,
33 pub size_bytes: u64,
34}
35
36impl IndexedFileState {
37 fn from_path(path: &Path) -> Option<Self> {
38 let meta = path.metadata().ok()?;
39 let size_bytes = meta.len();
40 let mtime_ms = meta
41 .modified()
42 .ok()
43 .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
44 .map(|d| d.as_millis() as u64)?;
45 Some(Self {
46 mtime_ms,
47 size_bytes,
48 })
49 }
50}
51
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct BM25Index {
54 pub chunks: Vec<CodeChunk>,
55 pub inverted: HashMap<String, Vec<(usize, f64)>>,
56 pub avg_doc_len: f64,
57 pub doc_count: usize,
58 pub doc_freqs: HashMap<String, usize>,
59 #[serde(default)]
60 pub files: HashMap<String, IndexedFileState>,
61}
62
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct SearchResult {
65 pub chunk_idx: usize,
66 pub score: f64,
67 pub file_path: String,
68 pub symbol_name: String,
69 pub kind: ChunkKind,
70 pub start_line: usize,
71 pub end_line: usize,
72 pub snippet: String,
73}
74
75const BM25_K1: f64 = 1.2;
76const BM25_B: f64 = 0.75;
77
78impl Default for BM25Index {
79 fn default() -> Self {
80 Self::new()
81 }
82}
83
84impl BM25Index {
85 pub fn new() -> Self {
86 Self {
87 chunks: Vec::new(),
88 inverted: HashMap::new(),
89 avg_doc_len: 0.0,
90 doc_count: 0,
91 doc_freqs: HashMap::new(),
92 files: HashMap::new(),
93 }
94 }
95
96 pub fn build_from_directory(root: &Path) -> Self {
97 let mut index = Self::new();
98 let files = list_code_files(root);
99 for rel in files {
100 let abs = root.join(&rel);
101 let Some(state) = IndexedFileState::from_path(&abs) else {
102 continue;
103 };
104 if let Ok(content) = std::fs::read_to_string(&abs) {
105 let mut chunks = extract_chunks(&rel, &content);
106 chunks.sort_by(|a, b| {
107 a.start_line
108 .cmp(&b.start_line)
109 .then_with(|| a.end_line.cmp(&b.end_line))
110 .then_with(|| a.symbol_name.cmp(&b.symbol_name))
111 });
112 for chunk in chunks {
113 index.add_chunk(chunk);
114 }
115 index.files.insert(rel, state);
116 }
117 }
118
119 index.finalize();
120 index
121 }
122
123 pub fn rebuild_incremental(root: &Path, prev: &BM25Index) -> Self {
124 let mut old_by_file: HashMap<String, Vec<CodeChunk>> = HashMap::new();
125 for c in &prev.chunks {
126 old_by_file
127 .entry(c.file_path.clone())
128 .or_default()
129 .push(c.clone());
130 }
131 for v in old_by_file.values_mut() {
132 v.sort_by(|a, b| {
133 a.start_line
134 .cmp(&b.start_line)
135 .then_with(|| a.end_line.cmp(&b.end_line))
136 .then_with(|| a.symbol_name.cmp(&b.symbol_name))
137 });
138 }
139
140 let mut index = Self::new();
141 let files = list_code_files(root);
142 for rel in files {
143 let abs = root.join(&rel);
144 let Some(state) = IndexedFileState::from_path(&abs) else {
145 continue;
146 };
147
148 let unchanged = prev.files.get(&rel).is_some_and(|old| *old == state);
149 if unchanged {
150 if let Some(chunks) = old_by_file.get(&rel) {
151 for chunk in chunks {
152 index.add_chunk(chunk.clone());
153 }
154 index.files.insert(rel, state);
155 continue;
156 }
157 }
158
159 if let Ok(content) = std::fs::read_to_string(&abs) {
160 let mut chunks = extract_chunks(&rel, &content);
161 chunks.sort_by(|a, b| {
162 a.start_line
163 .cmp(&b.start_line)
164 .then_with(|| a.end_line.cmp(&b.end_line))
165 .then_with(|| a.symbol_name.cmp(&b.symbol_name))
166 });
167 for chunk in chunks {
168 index.add_chunk(chunk);
169 }
170 index.files.insert(rel, state);
171 }
172 }
173
174 index.finalize();
175 index
176 }
177
178 fn add_chunk(&mut self, chunk: CodeChunk) {
179 let idx = self.chunks.len();
180
181 for token in &chunk.tokens {
182 let lower = token.to_lowercase();
183 self.inverted.entry(lower).or_default().push((idx, 1.0));
184 }
185
186 self.chunks.push(chunk);
187 }
188
189 fn finalize(&mut self) {
190 self.doc_count = self.chunks.len();
191 if self.doc_count == 0 {
192 return;
193 }
194
195 let total_len: usize = self.chunks.iter().map(|c| c.token_count).sum();
196 self.avg_doc_len = total_len as f64 / self.doc_count as f64;
197
198 self.doc_freqs.clear();
199 for (term, postings) in &self.inverted {
200 let unique_docs: std::collections::HashSet<usize> =
201 postings.iter().map(|(idx, _)| *idx).collect();
202 self.doc_freqs.insert(term.clone(), unique_docs.len());
203 }
204 }
205
206 pub fn search(&self, query: &str, top_k: usize) -> Vec<SearchResult> {
207 let query_tokens = tokenize(query);
208 if query_tokens.is_empty() || self.doc_count == 0 {
209 return Vec::new();
210 }
211
212 let mut scores: HashMap<usize, f64> = HashMap::new();
213
214 for token in &query_tokens {
215 let lower = token.to_lowercase();
216 let df = *self.doc_freqs.get(&lower).unwrap_or(&0) as f64;
217 if df == 0.0 {
218 continue;
219 }
220
221 let idf = ((self.doc_count as f64 - df + 0.5) / (df + 0.5) + 1.0).ln();
222
223 if let Some(postings) = self.inverted.get(&lower) {
224 let mut doc_tfs: HashMap<usize, f64> = HashMap::new();
225 for (idx, weight) in postings {
226 *doc_tfs.entry(*idx).or_insert(0.0) += weight;
227 }
228
229 for (doc_idx, tf) in &doc_tfs {
230 let doc_len = self.chunks[*doc_idx].token_count as f64;
231 let norm_len = doc_len / self.avg_doc_len.max(1.0);
232 let bm25 = idf * (tf * (BM25_K1 + 1.0))
233 / (tf + BM25_K1 * (1.0 - BM25_B + BM25_B * norm_len));
234
235 *scores.entry(*doc_idx).or_insert(0.0) += bm25;
236 }
237 }
238 }
239
240 let mut results: Vec<SearchResult> = scores
241 .into_iter()
242 .map(|(idx, score)| {
243 let chunk = &self.chunks[idx];
244 let snippet = chunk.content.lines().take(5).collect::<Vec<_>>().join("\n");
245 SearchResult {
246 chunk_idx: idx,
247 score,
248 file_path: chunk.file_path.clone(),
249 symbol_name: chunk.symbol_name.clone(),
250 kind: chunk.kind.clone(),
251 start_line: chunk.start_line,
252 end_line: chunk.end_line,
253 snippet,
254 }
255 })
256 .collect();
257
258 results.sort_by(|a, b| {
259 b.score
260 .partial_cmp(&a.score)
261 .unwrap_or(std::cmp::Ordering::Equal)
262 .then_with(|| a.file_path.cmp(&b.file_path))
263 .then_with(|| a.symbol_name.cmp(&b.symbol_name))
264 .then_with(|| a.start_line.cmp(&b.start_line))
265 .then_with(|| a.end_line.cmp(&b.end_line))
266 });
267 results.truncate(top_k);
268 results
269 }
270
271 pub fn save(&self, root: &Path) -> std::io::Result<()> {
272 let dir = index_dir(root);
273 std::fs::create_dir_all(&dir)?;
274 let data = serde_json::to_string(self).map_err(std::io::Error::other)?;
275 let target = dir.join("bm25_index.json");
276 let tmp = dir.join("bm25_index.json.tmp");
277 std::fs::write(&tmp, data)?;
278 std::fs::rename(&tmp, &target)?;
279 Ok(())
280 }
281
282 pub fn load(root: &Path) -> Option<Self> {
283 let path = index_dir(root).join("bm25_index.json");
284 let data = std::fs::read_to_string(path).ok()?;
285 serde_json::from_str(&data).ok()
286 }
287
288 pub fn load_or_build(root: &Path) -> Self {
289 if let Some(idx) = Self::load(root) {
290 if !vector_index_looks_stale(&idx, root) {
291 return idx;
292 }
293 tracing::warn!(
294 "[vector_index: stale index detected for {}; rebuilding]",
295 root.display()
296 );
297 let rebuilt = if idx.files.is_empty() {
298 Self::build_from_directory(root)
299 } else {
300 Self::rebuild_incremental(root, &idx)
301 };
302 let _ = rebuilt.save(root);
303 return rebuilt;
304 }
305
306 let built = Self::build_from_directory(root);
307 let _ = built.save(root);
308 built
309 }
310
311 pub fn index_file_path(root: &Path) -> PathBuf {
312 index_dir(root).join("bm25_index.json")
313 }
314}
315
316fn vector_index_looks_stale(index: &BM25Index, root: &Path) -> bool {
317 if index.chunks.is_empty() {
318 return false;
319 }
320
321 if index.files.is_empty() {
322 let mut seen = std::collections::HashSet::<&str>::new();
324 for chunk in &index.chunks {
325 let rel = chunk.file_path.trim_start_matches(['/', '\\']);
326 if rel.is_empty() {
327 continue;
328 }
329 if !seen.insert(rel) {
330 continue;
331 }
332 if !root.join(rel).exists() {
333 return true;
334 }
335 }
336 return false;
337 }
338
339 for (rel, old_state) in &index.files {
341 let abs = root.join(rel);
342 if !abs.exists() {
343 return true;
344 }
345 let Some(cur) = IndexedFileState::from_path(&abs) else {
346 return true;
347 };
348 if &cur != old_state {
349 return true;
350 }
351 }
352
353 for rel in list_code_files(root) {
355 if !index.files.contains_key(&rel) {
356 return true;
357 }
358 }
359
360 false
361}
362
363fn index_dir(root: &Path) -> PathBuf {
364 crate::core::index_namespace::vectors_dir(root)
365}
366
367fn list_code_files(root: &Path) -> Vec<String> {
368 let walker = ignore::WalkBuilder::new(root)
369 .hidden(true)
370 .git_ignore(true)
371 .git_global(true)
372 .git_exclude(true)
373 .build();
374
375 let mut files: Vec<String> = Vec::new();
376 for entry in walker.flatten() {
377 let path = entry.path();
378 if !path.is_file() {
379 continue;
380 }
381 if !is_code_file(path) {
382 continue;
383 }
384 let rel = path
385 .strip_prefix(root)
386 .unwrap_or(path)
387 .to_string_lossy()
388 .to_string();
389 if rel.is_empty() {
390 continue;
391 }
392 files.push(rel);
393 }
394
395 files.sort();
396 files.dedup();
397 files
398}
399
400pub fn is_code_file(path: &Path) -> bool {
401 let ext = path
402 .extension()
403 .and_then(|e| e.to_str())
404 .unwrap_or("")
405 .to_lowercase();
406 matches!(
407 ext.as_str(),
408 "rs" | "ts"
409 | "tsx"
410 | "js"
411 | "jsx"
412 | "py"
413 | "go"
414 | "java"
415 | "c"
416 | "cc"
417 | "cpp"
418 | "h"
419 | "hpp"
420 | "rb"
421 | "cs"
422 | "kt"
423 | "swift"
424 | "php"
425 | "scala"
426 | "sql"
427 | "ex"
428 | "exs"
429 | "zig"
430 | "lua"
431 | "dart"
432 | "vue"
433 | "svelte"
434 )
435}
436
437fn tokenize(text: &str) -> Vec<String> {
438 let mut tokens = Vec::new();
439 let mut current = String::new();
440
441 for ch in text.chars() {
442 if ch.is_alphanumeric() || ch == '_' {
443 current.push(ch);
444 } else {
445 if current.len() >= 2 {
446 tokens.push(current.clone());
447 }
448 current.clear();
449 }
450 }
451 if current.len() >= 2 {
452 tokens.push(current);
453 }
454
455 split_camel_case_tokens(&tokens)
456}
457
458pub(crate) fn tokenize_for_index(text: &str) -> Vec<String> {
459 tokenize(text)
460}
461
462fn split_camel_case_tokens(tokens: &[String]) -> Vec<String> {
463 let mut result = Vec::new();
464 for token in tokens {
465 result.push(token.clone());
466 let mut start = 0;
467 let chars: Vec<char> = token.chars().collect();
468 for i in 1..chars.len() {
469 if chars[i].is_uppercase() && (i + 1 >= chars.len() || !chars[i + 1].is_uppercase()) {
470 let part: String = chars[start..i].iter().collect();
471 if part.len() >= 2 {
472 result.push(part);
473 }
474 start = i;
475 }
476 }
477 if start > 0 {
478 let part: String = chars[start..].iter().collect();
479 if part.len() >= 2 {
480 result.push(part);
481 }
482 }
483 }
484 result
485}
486
487fn extract_chunks(file_path: &str, content: &str) -> Vec<CodeChunk> {
488 #[cfg(feature = "tree-sitter")]
489 {
490 let ext = std::path::Path::new(file_path)
491 .extension()
492 .and_then(|e| e.to_str())
493 .unwrap_or("");
494 if let Some(chunks) = crate::core::chunks_ts::extract_chunks_ts(file_path, content, ext) {
495 return chunks;
496 }
497 }
498
499 let lines: Vec<&str> = content.lines().collect();
500 if lines.is_empty() {
501 return Vec::new();
502 }
503
504 let mut chunks = Vec::new();
505 let mut i = 0;
506
507 while i < lines.len() {
508 let trimmed = lines[i].trim();
509
510 if let Some((name, kind)) = detect_symbol(trimmed) {
511 let start = i;
512 let end = find_block_end(&lines, i);
513 let block: String = lines[start..=end.min(lines.len() - 1)].to_vec().join("\n");
514 let tokens = tokenize(&block);
515 let token_count = tokens.len();
516
517 chunks.push(CodeChunk {
518 file_path: file_path.to_string(),
519 symbol_name: name,
520 kind,
521 start_line: start + 1,
522 end_line: end + 1,
523 content: block,
524 tokens,
525 token_count,
526 });
527
528 i = end + 1;
529 } else {
530 i += 1;
531 }
532 }
533
534 if chunks.is_empty() && !content.is_empty() {
535 let bytes = content.as_bytes();
540 let rk_chunks = crate::core::rabin_karp::chunk(content);
541 if !rk_chunks.is_empty() && rk_chunks.len() <= 200 {
542 for (idx, c) in rk_chunks.into_iter().take(50).enumerate() {
543 let end = (c.offset + c.length).min(bytes.len());
544 let slice = &bytes[c.offset..end];
545 let chunk_text = String::from_utf8_lossy(slice).into_owned();
546 let tokens = tokenize(&chunk_text);
547 let token_count = tokens.len();
548 let start_line = 1 + bytecount::count(&bytes[..c.offset], b'\n');
549 let end_line = start_line + bytecount::count(slice, b'\n');
550 chunks.push(CodeChunk {
551 file_path: file_path.to_string(),
552 symbol_name: format!("{file_path}#chunk-{idx}"),
553 kind: ChunkKind::Module,
554 start_line,
555 end_line: end_line.max(start_line),
556 content: chunk_text,
557 tokens,
558 token_count,
559 });
560 }
561 } else {
562 let tokens = tokenize(content);
563 let token_count = tokens.len();
564 let snippet = lines
565 .iter()
566 .take(50)
567 .copied()
568 .collect::<Vec<_>>()
569 .join("\n");
570 chunks.push(CodeChunk {
571 file_path: file_path.to_string(),
572 symbol_name: file_path.to_string(),
573 kind: ChunkKind::Module,
574 start_line: 1,
575 end_line: lines.len(),
576 content: snippet,
577 tokens,
578 token_count,
579 });
580 }
581 }
582
583 chunks
584}
585
586fn detect_symbol(line: &str) -> Option<(String, ChunkKind)> {
587 let trimmed = line.trim();
588
589 let patterns: &[(&str, ChunkKind)] = &[
590 ("pub async fn ", ChunkKind::Function),
591 ("async fn ", ChunkKind::Function),
592 ("pub fn ", ChunkKind::Function),
593 ("fn ", ChunkKind::Function),
594 ("pub struct ", ChunkKind::Struct),
595 ("struct ", ChunkKind::Struct),
596 ("pub enum ", ChunkKind::Struct),
597 ("enum ", ChunkKind::Struct),
598 ("impl ", ChunkKind::Impl),
599 ("pub trait ", ChunkKind::Struct),
600 ("trait ", ChunkKind::Struct),
601 ("export function ", ChunkKind::Function),
602 ("export async function ", ChunkKind::Function),
603 ("export default function ", ChunkKind::Function),
604 ("function ", ChunkKind::Function),
605 ("async function ", ChunkKind::Function),
606 ("export class ", ChunkKind::Class),
607 ("class ", ChunkKind::Class),
608 ("export interface ", ChunkKind::Struct),
609 ("interface ", ChunkKind::Struct),
610 ("def ", ChunkKind::Function),
611 ("async def ", ChunkKind::Function),
612 ("class ", ChunkKind::Class),
613 ("func ", ChunkKind::Function),
614 ];
615
616 for (prefix, kind) in patterns {
617 if let Some(rest) = trimmed.strip_prefix(prefix) {
618 let name: String = rest
619 .chars()
620 .take_while(|c| c.is_alphanumeric() || *c == '_' || *c == '<')
621 .take_while(|c| *c != '<')
622 .collect();
623 if !name.is_empty() {
624 return Some((name, kind.clone()));
625 }
626 }
627 }
628
629 None
630}
631
632fn find_block_end(lines: &[&str], start: usize) -> usize {
633 let mut depth = 0i32;
634 let mut found_open = false;
635
636 for (i, line) in lines.iter().enumerate().skip(start) {
637 for ch in line.chars() {
638 match ch {
639 '{' | '(' if !found_open || depth > 0 => {
640 depth += 1;
641 found_open = true;
642 }
643 '}' | ')' if depth > 0 => {
644 depth -= 1;
645 if depth == 0 && found_open {
646 return i;
647 }
648 }
649 _ => {}
650 }
651 }
652
653 if found_open && depth <= 0 && i > start {
654 return i;
655 }
656
657 if !found_open && i > start + 2 {
658 let trimmed = lines[i].trim();
659 if trimmed.is_empty()
660 || (!trimmed.starts_with(' ') && !trimmed.starts_with('\t') && i > start)
661 {
662 return i.saturating_sub(1);
663 }
664 }
665 }
666
667 (start + 50).min(lines.len().saturating_sub(1))
668}
669
670pub fn format_search_results(results: &[SearchResult], compact: bool) -> String {
671 if results.is_empty() {
672 return "No results found.".to_string();
673 }
674
675 let mut out = String::new();
676 for (i, r) in results.iter().enumerate() {
677 if compact {
678 out.push_str(&format!(
679 "{}. {:.2} {}:{}-{} {:?} {}\n",
680 i + 1,
681 r.score,
682 r.file_path,
683 r.start_line,
684 r.end_line,
685 r.kind,
686 r.symbol_name,
687 ));
688 } else {
689 out.push_str(&format!(
690 "\n--- Result {} (score: {:.2}) ---\n{} :: {} [{:?}] (L{}-{})\n{}\n",
691 i + 1,
692 r.score,
693 r.file_path,
694 r.symbol_name,
695 r.kind,
696 r.start_line,
697 r.end_line,
698 r.snippet,
699 ));
700 }
701 }
702 out
703}
704
705#[cfg(test)]
706mod tests {
707 use super::*;
708 use tempfile::tempdir;
709
710 #[cfg(unix)]
711 use std::os::unix::fs::PermissionsExt;
712
713 #[test]
714 fn tokenize_splits_code() {
715 let tokens = tokenize("fn calculate_total(items: Vec<Item>) -> f64");
716 assert!(tokens.contains(&"calculate_total".to_string()));
717 assert!(tokens.contains(&"items".to_string()));
718 assert!(tokens.contains(&"Vec".to_string()));
719 }
720
721 #[test]
722 fn camel_case_splitting() {
723 let tokens = split_camel_case_tokens(&["calculateTotal".to_string()]);
724 assert!(tokens.contains(&"calculateTotal".to_string()));
725 assert!(tokens.contains(&"calculate".to_string()));
726 assert!(tokens.contains(&"Total".to_string()));
727 }
728
729 #[test]
730 fn detect_rust_function() {
731 let (name, kind) =
732 detect_symbol("pub fn process_request(req: Request) -> Response {").unwrap();
733 assert_eq!(name, "process_request");
734 assert_eq!(kind, ChunkKind::Function);
735 }
736
737 #[test]
738 fn bm25_search_finds_relevant() {
739 let mut index = BM25Index::new();
740 index.add_chunk(CodeChunk {
741 file_path: "auth.rs".into(),
742 symbol_name: "validate_token".into(),
743 kind: ChunkKind::Function,
744 start_line: 1,
745 end_line: 10,
746 content: "fn validate_token(token: &str) -> bool { check_jwt_expiry(token) }".into(),
747 tokens: tokenize("fn validate_token token str bool check_jwt_expiry token"),
748 token_count: 8,
749 });
750 index.add_chunk(CodeChunk {
751 file_path: "db.rs".into(),
752 symbol_name: "connect_database".into(),
753 kind: ChunkKind::Function,
754 start_line: 1,
755 end_line: 5,
756 content: "fn connect_database(url: &str) -> Pool { create_pool(url) }".into(),
757 tokens: tokenize("fn connect_database url str Pool create_pool url"),
758 token_count: 7,
759 });
760 index.finalize();
761
762 let results = index.search("jwt token validation", 5);
763 assert!(!results.is_empty());
764 assert_eq!(results[0].symbol_name, "validate_token");
765 }
766
767 #[test]
768 fn bm25_search_sorts_ties_deterministically() {
769 let mut index = BM25Index::new();
770
771 index.add_chunk(CodeChunk {
773 file_path: "b.rs".into(),
774 symbol_name: "same".into(),
775 kind: ChunkKind::Function,
776 start_line: 1,
777 end_line: 1,
778 content: "fn same() {}".into(),
779 tokens: tokenize("same token"),
780 token_count: 2,
781 });
782 index.add_chunk(CodeChunk {
783 file_path: "a.rs".into(),
784 symbol_name: "same".into(),
785 kind: ChunkKind::Function,
786 start_line: 1,
787 end_line: 1,
788 content: "fn same() {}".into(),
789 tokens: tokenize("same token"),
790 token_count: 2,
791 });
792 index.finalize();
793
794 let results = index.search("same", 10);
795 assert!(results.len() >= 2);
796 assert_eq!(results[0].file_path, "a.rs");
797 assert_eq!(results[1].file_path, "b.rs");
798 }
799
800 #[test]
801 fn vector_index_is_stale_when_any_indexed_file_is_missing() {
802 let td = tempdir().expect("tempdir");
803 let root = td.path();
804 std::fs::write(root.join("a.rs"), "pub fn a() {}\n").expect("write a.rs");
805
806 let idx = BM25Index::build_from_directory(root);
807 assert!(!vector_index_looks_stale(&idx, root));
808
809 std::fs::remove_file(root.join("a.rs")).expect("remove a.rs");
810 assert!(vector_index_looks_stale(&idx, root));
811 }
812
813 #[test]
814 #[cfg(unix)]
815 fn bm25_incremental_rebuild_reuses_unchanged_files_without_reading() {
816 let td = tempdir().expect("tempdir");
817 let root = td.path();
818
819 std::fs::write(root.join("a.rs"), "pub fn a() { println!(\"A\"); }\n").expect("write a.rs");
820 std::fs::write(root.join("b.rs"), "pub fn b() { println!(\"B\"); }\n").expect("write b.rs");
821
822 let idx1 = BM25Index::build_from_directory(root);
823 assert!(idx1.files.contains_key("a.rs"));
824 assert!(idx1.files.contains_key("b.rs"));
825
826 let a_path = root.join("a.rs");
828 let mut perms = std::fs::metadata(&a_path).expect("meta a.rs").permissions();
829 perms.set_mode(0o000);
830 std::fs::set_permissions(&a_path, perms).expect("chmod a.rs");
831
832 std::fs::write(root.join("b.rs"), "pub fn b() { println!(\"B2\"); }\n")
834 .expect("rewrite b.rs");
835
836 let idx2 = BM25Index::rebuild_incremental(root, &idx1);
837 assert!(
838 idx2.files.contains_key("a.rs"),
839 "a.rs should be kept via reuse"
840 );
841 assert!(idx2.files.contains_key("b.rs"));
842
843 let b_has_b2 = idx2
844 .chunks
845 .iter()
846 .any(|c| c.file_path == "b.rs" && c.content.contains("B2"));
847 assert!(b_has_b2, "b.rs should be re-read and re-chunked");
848
849 let mut perms = std::fs::metadata(&a_path).expect("meta a.rs").permissions();
851 perms.set_mode(0o644);
852 let _ = std::fs::set_permissions(&a_path, perms);
853 }
854}