Skip to main content

codelens_engine/symbols/
writer.rs

1use super::parser::{flatten_symbols, parse_symbols};
2use super::types::{AnalyzedFile, IndexStats, ParsedSymbol};
3use super::SymbolIndex;
4use super::{collect_candidate_files, file_modified_ms, language_for_path};
5use crate::db::{self, content_hash, NewCall, NewImport, NewSymbol};
6use crate::import_graph::{extract_imports_from_source, resolve_module_for_file};
7use crate::project::ProjectRoot;
8use anyhow::{Context, Result};
9use std::collections::HashSet;
10use std::fs;
11use std::path::{Path, PathBuf};
12
13/// Analyze a single file: read, hash, parse symbols/imports/calls.
14/// Returns None if the file cannot be read or has no supported language.
15fn analyze_file(project: &ProjectRoot, file: &Path) -> Option<AnalyzedFile> {
16    let relative = project.to_relative(file);
17    let content = fs::read(file).ok()?;
18    let mtime = file_modified_ms(file).ok()? as i64;
19    let hash = content_hash(&content);
20    let source = String::from_utf8_lossy(&content);
21    let ext = file.extension()?.to_str()?.to_ascii_lowercase();
22
23    let symbols = language_for_path(file)
24        .and_then(|config| parse_symbols(&config, &relative, &source, false).ok())
25        .unwrap_or_default();
26
27    let raw_imports = extract_imports_from_source(file, &source);
28    let imports: Vec<NewImport> = raw_imports
29        .iter()
30        .filter_map(|raw| {
31            resolve_module_for_file(project, file, raw).map(|target| NewImport {
32                target_path: target,
33                raw_import: raw.clone(),
34            })
35        })
36        .collect();
37
38    let calls: Vec<NewCall> = crate::call_graph::extract_calls_from_source(file, &source)
39        .into_iter()
40        .map(|e| NewCall {
41            caller_name: e.caller_name,
42            callee_name: e.callee_name,
43            line: e.line as i64,
44        })
45        .collect();
46
47    Some(AnalyzedFile {
48        relative_path: relative,
49        mtime,
50        content_hash: hash,
51        size_bytes: content.len() as i64,
52        language_ext: ext,
53        symbols,
54        imports,
55        calls,
56    })
57}
58
59/// Commit an AnalyzedFile to the DB within an existing connection/transaction.
60/// Skips if the file is already fresh (same hash+mtime).
61/// Returns true if the file was actually written.
62fn commit_analyzed(conn: &rusqlite::Connection, analyzed: &AnalyzedFile) -> Result<bool> {
63    if db::get_fresh_file(
64        conn,
65        &analyzed.relative_path,
66        analyzed.mtime,
67        &analyzed.content_hash,
68    )?
69    .is_some()
70    {
71        return Ok(false);
72    }
73
74    let file_id = db::upsert_file(
75        conn,
76        &analyzed.relative_path,
77        analyzed.mtime,
78        &analyzed.content_hash,
79        analyzed.size_bytes,
80        Some(&analyzed.language_ext),
81    )?;
82
83    let flat = flatten_symbols(analyzed.symbols.clone());
84    let new_syms: Vec<NewSymbol<'_>> = flat
85        .iter()
86        .map(|s| NewSymbol {
87            name: &s.name,
88            kind: s.kind.as_label(),
89            line: s.line as i64,
90            column_num: s.column as i64,
91            start_byte: s.start_byte as i64,
92            end_byte: s.end_byte as i64,
93            signature: &s.signature,
94            name_path: &s.name_path,
95            parent_id: None,
96            end_line: s.end_line as i64,
97        })
98        .collect();
99    db::insert_symbols(conn, file_id, &new_syms)?;
100
101    if !analyzed.imports.is_empty() {
102        db::insert_imports(conn, file_id, &analyzed.imports)?;
103    }
104    if !analyzed.calls.is_empty() {
105        db::insert_calls(conn, file_id, &analyzed.calls)?;
106    }
107
108    Ok(true)
109}
110
111impl SymbolIndex {
112    /// One-time migration from legacy symbols-v1.json to SQLite.
113    pub(super) fn migrate_from_json(&mut self) -> Result<()> {
114        let json_path = self
115            .project
116            .as_path()
117            .join(".codelens/index/symbols-v1.json");
118        if !json_path.is_file() {
119            return Ok(());
120        }
121        let stats = self.refresh_all()?;
122        if stats.indexed_files > 0 || stats.stale_files == 0 {
123            let _ = fs::remove_file(&json_path);
124        } else {
125            tracing::warn!(
126                path = %json_path.display(),
127                "migration from JSON produced 0 indexed files, keeping legacy file"
128            );
129        }
130        Ok(())
131    }
132
133    pub fn refresh_all(&self) -> Result<IndexStats> {
134        use rayon::prelude::*;
135
136        let mut files = collect_candidate_files(self.project.as_path())?;
137        files.sort_by(|a, b| {
138            let sa = a.metadata().map(|m| m.len()).unwrap_or(0);
139            let sb = b.metadata().map(|m| m.len()).unwrap_or(0);
140            sb.cmp(&sa)
141        });
142
143        // Phase 1: parallel analysis (CPU-bound, no DB access)
144        let project = &self.project;
145        let analyzed: Vec<AnalyzedFile> = files
146            .par_iter()
147            .filter_map(|file| analyze_file(project, file))
148            .collect();
149
150        // Phase 2: sequential DB commit
151        self.writer().with_transaction(|conn| {
152            let mut on_disk = HashSet::new();
153            for af in &analyzed {
154                on_disk.insert(af.relative_path.clone());
155                commit_analyzed(conn, af)?;
156            }
157
158            // Remove files that no longer exist on disk
159            for indexed_path in db::all_file_paths(conn)? {
160                if !on_disk.contains(&indexed_path) {
161                    db::delete_file(conn, &indexed_path)?;
162                }
163            }
164
165            Ok(())
166        })?;
167        self.stats()
168    }
169
170    /// Incrementally re-index only the given files (changed/created).
171    pub fn index_files(&self, paths: &[PathBuf]) -> Result<usize> {
172        use rayon::prelude::*;
173
174        let project = &self.project;
175        let analyzed: Vec<AnalyzedFile> = paths
176            .par_iter()
177            .filter(|f| f.is_file())
178            .filter_map(|file| analyze_file(project, file))
179            .collect();
180
181        let count = analyzed.len();
182        if count == 0 {
183            return Ok(0);
184        }
185
186        self.writer().with_transaction(|conn| {
187            for af in &analyzed {
188                commit_analyzed(conn, af)?;
189            }
190            Ok(())
191        })?;
192        Ok(count)
193    }
194
195    /// Re-index a single file by relative path (convenience for post-mutation refresh).
196    pub fn refresh_file(&self, relative_path: &str) -> Result<usize> {
197        let abs = self.project.as_path().join(relative_path);
198        self.index_files(&[abs])
199    }
200
201    /// Remove deleted files from the index.
202    pub fn remove_files(&self, paths: &[PathBuf]) -> Result<usize> {
203        let count = paths.len();
204        let relatives: Vec<String> = paths.iter().map(|p| self.project.to_relative(p)).collect();
205        self.writer().with_transaction(|conn| {
206            for relative in &relatives {
207                db::delete_file(conn, relative)?;
208            }
209            Ok(())
210        })?;
211        Ok(count)
212    }
213
214    /// Ensure a file is indexed; returns parsed symbols for immediate use.
215    /// Fast path: if mtime unchanged, reads symbols from DB (no re-parse).
216    pub(super) fn ensure_indexed(&self, file: &Path, relative: &str) -> Result<Vec<ParsedSymbol>> {
217        let mtime = file_modified_ms(file)? as i64;
218        let db = self.writer();
219
220        // Fast path: mtime unchanged → read symbols from DB instead of re-parsing
221        if let Some(file_row) = db.get_fresh_file_by_mtime(relative, mtime)? {
222            let db_symbols = db.get_file_symbols(file_row.id)?;
223            return Ok(db_symbols
224                .into_iter()
225                .map(|row| {
226                    let row_line = row.line as usize;
227                    let row_end_line = if row.end_line > 0 {
228                        row.end_line as usize
229                    } else {
230                        row_line
231                    };
232                    ParsedSymbol {
233                        name: row.name,
234                        kind: super::types::SymbolKind::from_str_label(&row.kind),
235                        file_path: relative.to_owned(),
236                        line: row_line,
237                        column: row.column_num as usize,
238                        start_byte: row.start_byte as u32,
239                        end_byte: row.end_byte as u32,
240                        // Migration 7 persists end_line. Pre-migration
241                        // rows read back as 0 — we fall back to `line`
242                        // so the P1-4 proximity factor keeps working
243                        // on legacy caches.
244                        end_line: row_end_line,
245                        signature: row.signature,
246                        body: None,
247                        name_path: row.name_path,
248                        children: Vec::new(),
249                    }
250                })
251                .collect());
252        }
253
254        // Slow path: analyze and commit
255        let content =
256            fs::read(file).with_context(|| format!("failed to read {}", file.display()))?;
257        let hash = content_hash(&content);
258        let source = String::from_utf8_lossy(&content);
259        let symbols = if let Some(config) = language_for_path(file) {
260            parse_symbols(&config, relative, &source, false)?
261        } else {
262            Vec::new()
263        };
264
265        let ext = file
266            .extension()
267            .and_then(|e| e.to_str())
268            .map(|e| e.to_ascii_lowercase());
269
270        let file_id =
271            db.upsert_file(relative, mtime, &hash, content.len() as i64, ext.as_deref())?;
272
273        let flat = flatten_symbols(symbols.clone());
274        let new_syms: Vec<NewSymbol<'_>> = flat
275            .iter()
276            .map(|s| NewSymbol {
277                name: &s.name,
278                kind: s.kind.as_label(),
279                line: s.line as i64,
280                column_num: s.column as i64,
281                start_byte: s.start_byte as i64,
282                end_byte: s.end_byte as i64,
283                signature: &s.signature,
284                name_path: &s.name_path,
285                parent_id: None,
286                end_line: s.end_line as i64,
287            })
288            .collect();
289        db.insert_symbols(file_id, &new_syms)?;
290
291        let raw_imports = extract_imports_from_source(file, &source);
292        let new_imports: Vec<NewImport> = raw_imports
293            .iter()
294            .filter_map(|raw| {
295                resolve_module_for_file(&self.project, file, raw).map(|target| NewImport {
296                    target_path: target,
297                    raw_import: raw.clone(),
298                })
299            })
300            .collect();
301        if !new_imports.is_empty() {
302            db.insert_imports(file_id, &new_imports)?;
303        }
304
305        let call_edges: Vec<NewCall> = crate::call_graph::extract_calls_from_source(file, &source)
306            .into_iter()
307            .map(|e| NewCall {
308                caller_name: e.caller_name,
309                callee_name: e.callee_name,
310                line: e.line as i64,
311            })
312            .collect();
313        if !call_edges.is_empty() {
314            db.insert_calls(file_id, &call_edges)?;
315        }
316
317        Ok(symbols)
318    }
319}