Skip to main content

codelens_engine/symbols/
writer.rs

1use super::SymbolIndex;
2use super::parser::{flatten_symbols, parse_symbols};
3use super::types::{AnalyzedFile, IndexStats, ParsedSymbol};
4use super::{collect_candidate_files, file_modified_ms, language_for_path};
5use crate::db::{self, NewCall, NewImport, NewSymbol, content_hash};
6use crate::import_graph::{extract_imports_from_source, resolve_module_for_file};
7use crate::project::ProjectRoot;
8use anyhow::{Context, Result};
9use std::collections::HashSet;
10use std::fs;
11use std::path::{Path, PathBuf};
12
13fn should_bulk_rebuild_symbol_index(before: &IndexStats, candidate_count: usize) -> bool {
14    before.indexed_files > candidate_count.saturating_add(512)
15        && before.stale_files > candidate_count.saturating_div(2).max(256)
16}
17
18/// Analyze a single file: read, hash, parse symbols/imports/calls.
19/// Returns None if the file cannot be read or has no supported language.
20fn analyze_file(project: &ProjectRoot, file: &Path) -> Option<AnalyzedFile> {
21    let relative = project.to_relative(file);
22    let content = fs::read(file).ok()?;
23    let mtime = file_modified_ms(file).ok()? as i64;
24    let hash = content_hash(&content);
25    let source = String::from_utf8_lossy(&content);
26    // Mirror `language_for_path`: extension-less well-known files
27    // (Makefile, Dockerfile, Containerfile) key by lowercased file name.
28    // The old `file.extension()?` dropped them here even though candidate
29    // collection had already accepted them via `language_for_path`.
30    let ext = match file.extension().and_then(|e| e.to_str()) {
31        Some(e) => e.to_ascii_lowercase(),
32        None => file.file_name()?.to_str()?.to_ascii_lowercase(),
33    };
34
35    let symbols = language_for_path(file)
36        .and_then(|config| parse_symbols(&config, &relative, &source, false).ok())
37        .unwrap_or_default();
38
39    let raw_imports = extract_imports_from_source(file, &source);
40    let imports: Vec<NewImport> = raw_imports
41        .iter()
42        .filter_map(|raw| {
43            resolve_module_for_file(project, file, raw).map(|target| NewImport {
44                target_path: target,
45                raw_import: raw.clone(),
46            })
47        })
48        .collect();
49
50    let calls: Vec<NewCall> = crate::call_graph::extract_calls_from_source(file, &source)
51        .into_iter()
52        .map(|e| NewCall {
53            caller_name: e.caller_name,
54            callee_name: e.callee_name,
55            line: e.line as i64,
56        })
57        .collect();
58
59    Some(AnalyzedFile {
60        relative_path: relative,
61        mtime,
62        content_hash: hash,
63        size_bytes: content.len() as i64,
64        language_ext: ext,
65        symbols,
66        imports,
67        calls,
68    })
69}
70
71/// Commit an AnalyzedFile to the DB within an existing connection/transaction.
72/// Skips if the file is already fresh (same hash+mtime).
73/// Returns true if the file was actually written.
74fn commit_analyzed(conn: &rusqlite::Connection, analyzed: &AnalyzedFile) -> Result<bool> {
75    if db::get_fresh_file(
76        conn,
77        &analyzed.relative_path,
78        analyzed.mtime,
79        &analyzed.content_hash,
80    )?
81    .is_some()
82    {
83        return Ok(false);
84    }
85
86    let file_id = db::upsert_file(
87        conn,
88        &analyzed.relative_path,
89        analyzed.mtime,
90        &analyzed.content_hash,
91        analyzed.size_bytes,
92        Some(&analyzed.language_ext),
93    )?;
94
95    let flat = flatten_symbols(analyzed.symbols.clone());
96    let new_syms: Vec<NewSymbol<'_>> = flat
97        .iter()
98        .map(|s| NewSymbol {
99            name: &s.name,
100            kind: s.kind.as_label(),
101            line: s.line as i64,
102            column_num: s.column as i64,
103            start_byte: s.start_byte as i64,
104            end_byte: s.end_byte as i64,
105            signature: &s.signature,
106            name_path: &s.name_path,
107            parent_id: None,
108        })
109        .collect();
110    db::insert_symbols(conn, file_id, &new_syms)?;
111
112    if !analyzed.imports.is_empty() {
113        db::insert_imports(conn, file_id, &analyzed.imports)?;
114    }
115    if !analyzed.calls.is_empty() {
116        db::insert_calls(conn, file_id, &analyzed.calls)?;
117    }
118
119    Ok(true)
120}
121
122impl SymbolIndex {
123    /// One-time migration from legacy symbols-v1.json to SQLite.
124    pub(super) fn migrate_from_json(&mut self) -> Result<()> {
125        let json_path = self
126            .project
127            .as_path()
128            .join(".codelens/index/symbols-v1.json");
129        if !json_path.is_file() {
130            return Ok(());
131        }
132        let stats = self.refresh_all()?;
133        if stats.indexed_files > 0 || stats.stale_files == 0 {
134            let _ = fs::remove_file(&json_path);
135        } else {
136            tracing::warn!(
137                path = %json_path.display(),
138                "migration from JSON produced 0 indexed files, keeping legacy file"
139            );
140        }
141        Ok(())
142    }
143
144    pub fn refresh_all(&self) -> Result<IndexStats> {
145        use rayon::prelude::*;
146
147        let mut files = collect_candidate_files(self.project.as_path())?;
148        let before_stats = self.stats().ok();
149        let bulk_rebuild = before_stats
150            .as_ref()
151            .is_some_and(|before| should_bulk_rebuild_symbol_index(before, files.len()));
152        files.sort_by(|a, b| {
153            let sa = a.metadata().map(|m| m.len()).unwrap_or(0);
154            let sb = b.metadata().map(|m| m.len()).unwrap_or(0);
155            sb.cmp(&sa)
156        });
157
158        // Phase 1: parallel analysis (CPU-bound, no DB access)
159        let project = &self.project;
160        let analyzed: Vec<AnalyzedFile> = files
161            .par_iter()
162            .filter_map(|file| analyze_file(project, file))
163            .collect();
164
165        // Phase 2: sequential DB commit
166        self.writer().with_transaction(|conn| {
167            if bulk_rebuild {
168                db::clear_symbol_index(conn)?;
169            }
170
171            let mut on_disk = HashSet::new();
172            for af in &analyzed {
173                on_disk.insert(af.relative_path.clone());
174                commit_analyzed(conn, af)?;
175            }
176
177            if !bulk_rebuild {
178                // Remove files that no longer exist on disk.
179                for indexed_path in db::all_file_paths(conn)? {
180                    if !on_disk.contains(&indexed_path) {
181                        db::delete_file(conn, &indexed_path)?;
182                    }
183                }
184            }
185
186            Ok(())
187        })?;
188        if let Err(error) = self.checkpoint_wal_passive() {
189            tracing::debug!(
190                path = %self.db_path.display(),
191                error = %error,
192                "symbol index WAL checkpoint skipped after refresh"
193            );
194        }
195        self.stats()
196    }
197
198    /// Incrementally re-index only the given files (changed/created).
199    pub fn index_files(&self, paths: &[PathBuf]) -> Result<usize> {
200        use rayon::prelude::*;
201
202        let project = &self.project;
203        let analyzed: Vec<AnalyzedFile> = paths
204            .par_iter()
205            .filter(|f| f.is_file())
206            .filter_map(|file| analyze_file(project, file))
207            .collect();
208
209        let count = analyzed.len();
210        if count == 0 {
211            return Ok(0);
212        }
213
214        self.writer().with_transaction(|conn| {
215            for af in &analyzed {
216                commit_analyzed(conn, af)?;
217            }
218            Ok(())
219        })?;
220        Ok(count)
221    }
222
223    /// Re-index a single file by relative path (convenience for post-mutation refresh).
224    pub fn refresh_file(&self, relative_path: &str) -> Result<usize> {
225        let abs = self.project.as_path().join(relative_path);
226        self.index_files(&[abs])
227    }
228
229    /// Remove deleted files from the index.
230    pub fn remove_files(&self, paths: &[PathBuf]) -> Result<usize> {
231        let count = paths.len();
232        let relatives: Vec<String> = paths.iter().map(|p| self.project.to_relative(p)).collect();
233        self.writer().with_transaction(|conn| {
234            for relative in &relatives {
235                db::delete_file(conn, relative)?;
236            }
237            Ok(())
238        })?;
239        Ok(count)
240    }
241
242    /// Ensure a file is indexed; returns parsed symbols for immediate use.
243    /// Fast path: if mtime unchanged, reads symbols from DB (no re-parse).
244    pub(super) fn ensure_indexed(&self, file: &Path, relative: &str) -> Result<Vec<ParsedSymbol>> {
245        let mtime = file_modified_ms(file)? as i64;
246        let db = self.writer();
247
248        // Fast path: mtime unchanged → read symbols from DB instead of re-parsing
249        if let Some(file_row) = db.get_fresh_file_by_mtime(relative, mtime)? {
250            let db_symbols = db.get_file_symbols(file_row.id)?;
251            return Ok(db_symbols
252                .into_iter()
253                .map(|row| ParsedSymbol {
254                    name: row.name,
255                    kind: super::types::SymbolKind::from_str_label(&row.kind),
256                    file_path: relative.to_owned(),
257                    line: row.line as usize,
258                    column: row.column_num as usize,
259                    start_byte: row.start_byte as u32,
260                    end_byte: row.end_byte as u32,
261                    signature: row.signature,
262                    body: None,
263                    name_path: row.name_path,
264                    children: Vec::new(),
265                })
266                .collect());
267        }
268
269        // Slow path: analyze and commit
270        let content =
271            fs::read(file).with_context(|| format!("failed to read {}", file.display()))?;
272        let hash = content_hash(&content);
273        let source = String::from_utf8_lossy(&content);
274        let symbols = if let Some(config) = language_for_path(file) {
275            parse_symbols(&config, relative, &source, false)?
276        } else {
277            Vec::new()
278        };
279
280        // Same filename-key fallback as `analyze_file`, gated on the language
281        // registry so arbitrary extension-less files (LICENSE, …) don't leak
282        // their names into the `language` column.
283        let ext = file
284            .extension()
285            .and_then(|e| e.to_str())
286            .map(|e| e.to_ascii_lowercase())
287            .or_else(|| {
288                language_for_path(file)?;
289                file.file_name()
290                    .and_then(|n| n.to_str())
291                    .map(|n| n.to_ascii_lowercase())
292            });
293
294        let file_id =
295            db.upsert_file(relative, mtime, &hash, content.len() as i64, ext.as_deref())?;
296
297        let flat = flatten_symbols(symbols.clone());
298        let new_syms: Vec<NewSymbol<'_>> = flat
299            .iter()
300            .map(|s| NewSymbol {
301                name: &s.name,
302                kind: s.kind.as_label(),
303                line: s.line as i64,
304                column_num: s.column as i64,
305                start_byte: s.start_byte as i64,
306                end_byte: s.end_byte as i64,
307                signature: &s.signature,
308                name_path: &s.name_path,
309                parent_id: None,
310            })
311            .collect();
312        db.insert_symbols(file_id, &new_syms)?;
313
314        let raw_imports = extract_imports_from_source(file, &source);
315        let new_imports: Vec<NewImport> = raw_imports
316            .iter()
317            .filter_map(|raw| {
318                resolve_module_for_file(&self.project, file, raw).map(|target| NewImport {
319                    target_path: target,
320                    raw_import: raw.clone(),
321                })
322            })
323            .collect();
324        if !new_imports.is_empty() {
325            db.insert_imports(file_id, &new_imports)?;
326        }
327
328        let call_edges: Vec<NewCall> = crate::call_graph::extract_calls_from_source(file, &source)
329            .into_iter()
330            .map(|e| NewCall {
331                caller_name: e.caller_name,
332                callee_name: e.callee_name,
333                line: e.line as i64,
334            })
335            .collect();
336        if !call_edges.is_empty() {
337            db.insert_calls(file_id, &call_edges)?;
338        }
339
340        Ok(symbols)
341    }
342}
343
344#[cfg(test)]
345mod tests {
346    use super::*;
347
348    #[test]
349    fn bulk_rebuild_triggers_for_large_stale_overhang() {
350        let before = IndexStats {
351            indexed_files: 3_854,
352            supported_files: 2_021,
353            stale_files: 3_147,
354        };
355
356        assert!(should_bulk_rebuild_symbol_index(&before, 1_978));
357    }
358
359    #[test]
360    fn bulk_rebuild_does_not_trigger_for_normal_stale_refresh() {
361        let before = IndexStats {
362            indexed_files: 1_978,
363            supported_files: 1_978,
364            stale_files: 40,
365        };
366
367        assert!(!should_bulk_rebuild_symbol_index(&before, 1_978));
368    }
369}