1use super::SymbolIndex;
2use super::parser::{flatten_symbols, parse_symbols};
3use super::types::{AnalyzedFile, IndexStats, ParsedSymbol};
4use super::{collect_candidate_files, file_modified_ms, language_for_path};
5use crate::db::{self, NewCall, NewImport, NewSymbol, content_hash};
6use crate::import_graph::{extract_imports_from_source, resolve_module_for_file};
7use crate::project::ProjectRoot;
8use anyhow::{Context, Result};
9use std::collections::HashSet;
10use std::fs;
11use std::path::{Path, PathBuf};
12
13fn should_bulk_rebuild_symbol_index(before: &IndexStats, candidate_count: usize) -> bool {
14 before.indexed_files > candidate_count.saturating_add(512)
15 && before.stale_files > candidate_count.saturating_div(2).max(256)
16}
17
18fn analyze_file(project: &ProjectRoot, file: &Path) -> Option<AnalyzedFile> {
21 let relative = project.to_relative(file);
22 let content = fs::read(file).ok()?;
23 let mtime = file_modified_ms(file).ok()? as i64;
24 let hash = content_hash(&content);
25 let source = String::from_utf8_lossy(&content);
26 let ext = match file.extension().and_then(|e| e.to_str()) {
31 Some(e) => e.to_ascii_lowercase(),
32 None => file.file_name()?.to_str()?.to_ascii_lowercase(),
33 };
34
35 let symbols = language_for_path(file)
36 .and_then(|config| parse_symbols(&config, &relative, &source, false).ok())
37 .unwrap_or_default();
38
39 let raw_imports = extract_imports_from_source(file, &source);
40 let imports: Vec<NewImport> = raw_imports
41 .iter()
42 .filter_map(|raw| {
43 resolve_module_for_file(project, file, raw).map(|target| NewImport {
44 target_path: target,
45 raw_import: raw.clone(),
46 })
47 })
48 .collect();
49
50 let calls: Vec<NewCall> = crate::call_graph::extract_calls_from_source(file, &source)
51 .into_iter()
52 .map(|e| NewCall {
53 caller_name: e.caller_name,
54 callee_name: e.callee_name,
55 line: e.line as i64,
56 })
57 .collect();
58
59 Some(AnalyzedFile {
60 relative_path: relative,
61 mtime,
62 content_hash: hash,
63 size_bytes: content.len() as i64,
64 language_ext: ext,
65 symbols,
66 imports,
67 calls,
68 })
69}
70
71fn commit_analyzed(conn: &rusqlite::Connection, analyzed: &AnalyzedFile) -> Result<bool> {
75 if db::get_fresh_file(
76 conn,
77 &analyzed.relative_path,
78 analyzed.mtime,
79 &analyzed.content_hash,
80 )?
81 .is_some()
82 {
83 return Ok(false);
84 }
85
86 let file_id = db::upsert_file(
87 conn,
88 &analyzed.relative_path,
89 analyzed.mtime,
90 &analyzed.content_hash,
91 analyzed.size_bytes,
92 Some(&analyzed.language_ext),
93 )?;
94
95 let flat = flatten_symbols(analyzed.symbols.clone());
96 let new_syms: Vec<NewSymbol<'_>> = flat
97 .iter()
98 .map(|s| NewSymbol {
99 name: &s.name,
100 kind: s.kind.as_label(),
101 line: s.line as i64,
102 column_num: s.column as i64,
103 start_byte: s.start_byte as i64,
104 end_byte: s.end_byte as i64,
105 signature: &s.signature,
106 name_path: &s.name_path,
107 parent_id: None,
108 })
109 .collect();
110 db::insert_symbols(conn, file_id, &new_syms)?;
111
112 if !analyzed.imports.is_empty() {
113 db::insert_imports(conn, file_id, &analyzed.imports)?;
114 }
115 if !analyzed.calls.is_empty() {
116 db::insert_calls(conn, file_id, &analyzed.calls)?;
117 }
118
119 Ok(true)
120}
121
122impl SymbolIndex {
123 pub(super) fn migrate_from_json(&mut self) -> Result<()> {
125 let json_path = self
126 .project
127 .as_path()
128 .join(".codelens/index/symbols-v1.json");
129 if !json_path.is_file() {
130 return Ok(());
131 }
132 let stats = self.refresh_all()?;
133 if stats.indexed_files > 0 || stats.stale_files == 0 {
134 let _ = fs::remove_file(&json_path);
135 } else {
136 tracing::warn!(
137 path = %json_path.display(),
138 "migration from JSON produced 0 indexed files, keeping legacy file"
139 );
140 }
141 Ok(())
142 }
143
144 pub fn refresh_all(&self) -> Result<IndexStats> {
145 use rayon::prelude::*;
146
147 let mut files = collect_candidate_files(self.project.as_path())?;
148 let before_stats = self.stats().ok();
149 let bulk_rebuild = before_stats
150 .as_ref()
151 .is_some_and(|before| should_bulk_rebuild_symbol_index(before, files.len()));
152 files.sort_by(|a, b| {
153 let sa = a.metadata().map(|m| m.len()).unwrap_or(0);
154 let sb = b.metadata().map(|m| m.len()).unwrap_or(0);
155 sb.cmp(&sa)
156 });
157
158 let project = &self.project;
160 let analyzed: Vec<AnalyzedFile> = files
161 .par_iter()
162 .filter_map(|file| analyze_file(project, file))
163 .collect();
164
165 self.writer().with_transaction(|conn| {
167 if bulk_rebuild {
168 db::clear_symbol_index(conn)?;
169 }
170
171 let mut on_disk = HashSet::new();
172 for af in &analyzed {
173 on_disk.insert(af.relative_path.clone());
174 commit_analyzed(conn, af)?;
175 }
176
177 if !bulk_rebuild {
178 for indexed_path in db::all_file_paths(conn)? {
180 if !on_disk.contains(&indexed_path) {
181 db::delete_file(conn, &indexed_path)?;
182 }
183 }
184 }
185
186 Ok(())
187 })?;
188 if let Err(error) = self.checkpoint_wal_passive() {
189 tracing::debug!(
190 path = %self.db_path.display(),
191 error = %error,
192 "symbol index WAL checkpoint skipped after refresh"
193 );
194 }
195 self.stats()
196 }
197
198 pub fn index_files(&self, paths: &[PathBuf]) -> Result<usize> {
200 use rayon::prelude::*;
201
202 let project = &self.project;
203 let analyzed: Vec<AnalyzedFile> = paths
204 .par_iter()
205 .filter(|f| f.is_file())
206 .filter_map(|file| analyze_file(project, file))
207 .collect();
208
209 let count = analyzed.len();
210 if count == 0 {
211 return Ok(0);
212 }
213
214 self.writer().with_transaction(|conn| {
215 for af in &analyzed {
216 commit_analyzed(conn, af)?;
217 }
218 Ok(())
219 })?;
220 Ok(count)
221 }
222
223 pub fn refresh_file(&self, relative_path: &str) -> Result<usize> {
225 let abs = self.project.as_path().join(relative_path);
226 self.index_files(&[abs])
227 }
228
229 pub fn remove_files(&self, paths: &[PathBuf]) -> Result<usize> {
231 let count = paths.len();
232 let relatives: Vec<String> = paths.iter().map(|p| self.project.to_relative(p)).collect();
233 self.writer().with_transaction(|conn| {
234 for relative in &relatives {
235 db::delete_file(conn, relative)?;
236 }
237 Ok(())
238 })?;
239 Ok(count)
240 }
241
242 pub(super) fn ensure_indexed(&self, file: &Path, relative: &str) -> Result<Vec<ParsedSymbol>> {
245 let mtime = file_modified_ms(file)? as i64;
246 let db = self.writer();
247
248 if let Some(file_row) = db.get_fresh_file_by_mtime(relative, mtime)? {
250 let db_symbols = db.get_file_symbols(file_row.id)?;
251 return Ok(db_symbols
252 .into_iter()
253 .map(|row| ParsedSymbol {
254 name: row.name,
255 kind: super::types::SymbolKind::from_str_label(&row.kind),
256 file_path: relative.to_owned(),
257 line: row.line as usize,
258 column: row.column_num as usize,
259 start_byte: row.start_byte as u32,
260 end_byte: row.end_byte as u32,
261 signature: row.signature,
262 body: None,
263 name_path: row.name_path,
264 children: Vec::new(),
265 })
266 .collect());
267 }
268
269 let content =
271 fs::read(file).with_context(|| format!("failed to read {}", file.display()))?;
272 let hash = content_hash(&content);
273 let source = String::from_utf8_lossy(&content);
274 let symbols = if let Some(config) = language_for_path(file) {
275 parse_symbols(&config, relative, &source, false)?
276 } else {
277 Vec::new()
278 };
279
280 let ext = file
284 .extension()
285 .and_then(|e| e.to_str())
286 .map(|e| e.to_ascii_lowercase())
287 .or_else(|| {
288 language_for_path(file)?;
289 file.file_name()
290 .and_then(|n| n.to_str())
291 .map(|n| n.to_ascii_lowercase())
292 });
293
294 let file_id =
295 db.upsert_file(relative, mtime, &hash, content.len() as i64, ext.as_deref())?;
296
297 let flat = flatten_symbols(symbols.clone());
298 let new_syms: Vec<NewSymbol<'_>> = flat
299 .iter()
300 .map(|s| NewSymbol {
301 name: &s.name,
302 kind: s.kind.as_label(),
303 line: s.line as i64,
304 column_num: s.column as i64,
305 start_byte: s.start_byte as i64,
306 end_byte: s.end_byte as i64,
307 signature: &s.signature,
308 name_path: &s.name_path,
309 parent_id: None,
310 })
311 .collect();
312 db.insert_symbols(file_id, &new_syms)?;
313
314 let raw_imports = extract_imports_from_source(file, &source);
315 let new_imports: Vec<NewImport> = raw_imports
316 .iter()
317 .filter_map(|raw| {
318 resolve_module_for_file(&self.project, file, raw).map(|target| NewImport {
319 target_path: target,
320 raw_import: raw.clone(),
321 })
322 })
323 .collect();
324 if !new_imports.is_empty() {
325 db.insert_imports(file_id, &new_imports)?;
326 }
327
328 let call_edges: Vec<NewCall> = crate::call_graph::extract_calls_from_source(file, &source)
329 .into_iter()
330 .map(|e| NewCall {
331 caller_name: e.caller_name,
332 callee_name: e.callee_name,
333 line: e.line as i64,
334 })
335 .collect();
336 if !call_edges.is_empty() {
337 db.insert_calls(file_id, &call_edges)?;
338 }
339
340 Ok(symbols)
341 }
342}
343
344#[cfg(test)]
345mod tests {
346 use super::*;
347
348 #[test]
349 fn bulk_rebuild_triggers_for_large_stale_overhang() {
350 let before = IndexStats {
351 indexed_files: 3_854,
352 supported_files: 2_021,
353 stale_files: 3_147,
354 };
355
356 assert!(should_bulk_rebuild_symbol_index(&before, 1_978));
357 }
358
359 #[test]
360 fn bulk_rebuild_does_not_trigger_for_normal_stale_refresh() {
361 let before = IndexStats {
362 indexed_files: 1_978,
363 supported_files: 1_978,
364 stale_files: 40,
365 };
366
367 assert!(!should_bulk_rebuild_symbol_index(&before, 1_978));
368 }
369}