Skip to main content

codelens_engine/symbols/
writer.rs

1use super::SymbolIndex;
2use super::parser::{flatten_symbols, parse_symbols};
3use super::types::{AnalyzedFile, IndexStats, ParsedSymbol};
4use super::{collect_candidate_files, file_modified_ms, language_for_path};
5use crate::db::{self, NewCall, NewImport, NewSymbol, content_hash};
6use crate::import_graph::{extract_imports_from_source, resolve_module_for_file};
7use crate::project::ProjectRoot;
8use anyhow::{Context, Result};
9use std::collections::HashSet;
10use std::fs;
11use std::path::{Path, PathBuf};
12
13/// Analyze a single file: read, hash, parse symbols/imports/calls.
14/// Returns None if the file cannot be read or has no supported language.
15fn analyze_file(project: &ProjectRoot, file: &Path) -> Option<AnalyzedFile> {
16    let relative = project.to_relative(file);
17    let content = fs::read(file).ok()?;
18    let mtime = file_modified_ms(file).ok()? as i64;
19    let hash = content_hash(&content);
20    let source = String::from_utf8_lossy(&content);
21    let ext = file.extension()?.to_str()?.to_ascii_lowercase();
22
23    let symbols = language_for_path(file)
24        .and_then(|config| parse_symbols(&config, &relative, &source, false).ok())
25        .unwrap_or_default();
26
27    let raw_imports = extract_imports_from_source(file, &source);
28    let imports: Vec<NewImport> = raw_imports
29        .iter()
30        .filter_map(|raw| {
31            resolve_module_for_file(project, file, raw).map(|target| NewImport {
32                target_path: target,
33                raw_import: raw.clone(),
34            })
35        })
36        .collect();
37
38    let calls: Vec<NewCall> = crate::call_graph::extract_calls_from_source(file, &source)
39        .into_iter()
40        .map(|e| NewCall {
41            caller_name: e.caller_name,
42            callee_name: e.callee_name,
43            line: e.line as i64,
44        })
45        .collect();
46
47    Some(AnalyzedFile {
48        relative_path: relative,
49        mtime,
50        content_hash: hash,
51        size_bytes: content.len() as i64,
52        language_ext: ext,
53        symbols,
54        imports,
55        calls,
56    })
57}
58
59/// Commit an AnalyzedFile to the DB within an existing connection/transaction.
60/// Skips if the file is already fresh (same hash+mtime).
61/// Returns true if the file was actually written.
62fn commit_analyzed(conn: &rusqlite::Connection, analyzed: &AnalyzedFile) -> Result<bool> {
63    if db::get_fresh_file(
64        conn,
65        &analyzed.relative_path,
66        analyzed.mtime,
67        &analyzed.content_hash,
68    )?
69    .is_some()
70    {
71        return Ok(false);
72    }
73
74    let file_id = db::upsert_file(
75        conn,
76        &analyzed.relative_path,
77        analyzed.mtime,
78        &analyzed.content_hash,
79        analyzed.size_bytes,
80        Some(&analyzed.language_ext),
81    )?;
82
83    let flat = flatten_symbols(analyzed.symbols.clone());
84    let new_syms: Vec<NewSymbol<'_>> = flat
85        .iter()
86        .map(|s| NewSymbol {
87            name: &s.name,
88            kind: s.kind.as_label(),
89            line: s.line as i64,
90            column_num: s.column as i64,
91            start_byte: s.start_byte as i64,
92            end_byte: s.end_byte as i64,
93            signature: &s.signature,
94            name_path: &s.name_path,
95            parent_id: None,
96        })
97        .collect();
98    db::insert_symbols(conn, file_id, &new_syms)?;
99
100    if !analyzed.imports.is_empty() {
101        db::insert_imports(conn, file_id, &analyzed.imports)?;
102    }
103    if !analyzed.calls.is_empty() {
104        db::insert_calls(conn, file_id, &analyzed.calls)?;
105    }
106
107    Ok(true)
108}
109
110impl SymbolIndex {
111    /// One-time migration from legacy symbols-v1.json to SQLite.
112    pub(super) fn migrate_from_json(&mut self) -> Result<()> {
113        let json_path = self
114            .project
115            .as_path()
116            .join(".codelens/index/symbols-v1.json");
117        if !json_path.is_file() {
118            return Ok(());
119        }
120        let stats = self.refresh_all()?;
121        if stats.indexed_files > 0 || stats.stale_files == 0 {
122            let _ = fs::remove_file(&json_path);
123        } else {
124            tracing::warn!(
125                path = %json_path.display(),
126                "migration from JSON produced 0 indexed files, keeping legacy file"
127            );
128        }
129        Ok(())
130    }
131
132    pub fn refresh_all(&self) -> Result<IndexStats> {
133        use rayon::prelude::*;
134
135        let mut files = collect_candidate_files(self.project.as_path())?;
136        files.sort_by(|a, b| {
137            let sa = a.metadata().map(|m| m.len()).unwrap_or(0);
138            let sb = b.metadata().map(|m| m.len()).unwrap_or(0);
139            sb.cmp(&sa)
140        });
141
142        // Phase 1: parallel analysis (CPU-bound, no DB access)
143        let project = &self.project;
144        let analyzed: Vec<AnalyzedFile> = files
145            .par_iter()
146            .filter_map(|file| analyze_file(project, file))
147            .collect();
148
149        // Phase 2: sequential DB commit
150        self.writer().with_transaction(|conn| {
151            let mut on_disk = HashSet::new();
152            for af in &analyzed {
153                on_disk.insert(af.relative_path.clone());
154                commit_analyzed(conn, af)?;
155            }
156
157            // Remove files that no longer exist on disk
158            for indexed_path in db::all_file_paths(conn)? {
159                if !on_disk.contains(&indexed_path) {
160                    db::delete_file(conn, &indexed_path)?;
161                }
162            }
163
164            Ok(())
165        })?;
166        self.stats()
167    }
168
169    /// Incrementally re-index only the given files (changed/created).
170    pub fn index_files(&self, paths: &[PathBuf]) -> Result<usize> {
171        use rayon::prelude::*;
172
173        let project = &self.project;
174        let analyzed: Vec<AnalyzedFile> = paths
175            .par_iter()
176            .filter(|f| f.is_file())
177            .filter_map(|file| analyze_file(project, file))
178            .collect();
179
180        let count = analyzed.len();
181        if count == 0 {
182            return Ok(0);
183        }
184
185        self.writer().with_transaction(|conn| {
186            for af in &analyzed {
187                commit_analyzed(conn, af)?;
188            }
189            Ok(())
190        })?;
191        Ok(count)
192    }
193
194    /// Re-index a single file by relative path (convenience for post-mutation refresh).
195    pub fn refresh_file(&self, relative_path: &str) -> Result<usize> {
196        let abs = self.project.as_path().join(relative_path);
197        self.index_files(&[abs])
198    }
199
200    /// Remove deleted files from the index.
201    pub fn remove_files(&self, paths: &[PathBuf]) -> Result<usize> {
202        let count = paths.len();
203        let relatives: Vec<String> = paths.iter().map(|p| self.project.to_relative(p)).collect();
204        self.writer().with_transaction(|conn| {
205            for relative in &relatives {
206                db::delete_file(conn, relative)?;
207            }
208            Ok(())
209        })?;
210        Ok(count)
211    }
212
213    /// Ensure a file is indexed; returns parsed symbols for immediate use.
214    /// Fast path: if mtime unchanged, reads symbols from DB (no re-parse).
215    pub(super) fn ensure_indexed(&self, file: &Path, relative: &str) -> Result<Vec<ParsedSymbol>> {
216        let mtime = file_modified_ms(file)? as i64;
217        let db = self.writer();
218
219        // Fast path: mtime unchanged → read symbols from DB instead of re-parsing
220        if let Some(file_row) = db.get_fresh_file_by_mtime(relative, mtime)? {
221            let db_symbols = db.get_file_symbols(file_row.id)?;
222            return Ok(db_symbols
223                .into_iter()
224                .map(|row| ParsedSymbol {
225                    name: row.name,
226                    kind: super::types::SymbolKind::from_str_label(&row.kind),
227                    file_path: relative.to_owned(),
228                    line: row.line as usize,
229                    column: row.column_num as usize,
230                    start_byte: row.start_byte as u32,
231                    end_byte: row.end_byte as u32,
232                    signature: row.signature,
233                    body: None,
234                    name_path: row.name_path,
235                    children: Vec::new(),
236                })
237                .collect());
238        }
239
240        // Slow path: analyze and commit
241        let content =
242            fs::read(file).with_context(|| format!("failed to read {}", file.display()))?;
243        let hash = content_hash(&content);
244        let source = String::from_utf8_lossy(&content);
245        let symbols = if let Some(config) = language_for_path(file) {
246            parse_symbols(&config, relative, &source, false)?
247        } else {
248            Vec::new()
249        };
250
251        let ext = file
252            .extension()
253            .and_then(|e| e.to_str())
254            .map(|e| e.to_ascii_lowercase());
255
256        let file_id =
257            db.upsert_file(relative, mtime, &hash, content.len() as i64, ext.as_deref())?;
258
259        let flat = flatten_symbols(symbols.clone());
260        let new_syms: Vec<NewSymbol<'_>> = flat
261            .iter()
262            .map(|s| NewSymbol {
263                name: &s.name,
264                kind: s.kind.as_label(),
265                line: s.line as i64,
266                column_num: s.column as i64,
267                start_byte: s.start_byte as i64,
268                end_byte: s.end_byte as i64,
269                signature: &s.signature,
270                name_path: &s.name_path,
271                parent_id: None,
272            })
273            .collect();
274        db.insert_symbols(file_id, &new_syms)?;
275
276        let raw_imports = extract_imports_from_source(file, &source);
277        let new_imports: Vec<NewImport> = raw_imports
278            .iter()
279            .filter_map(|raw| {
280                resolve_module_for_file(&self.project, file, raw).map(|target| NewImport {
281                    target_path: target,
282                    raw_import: raw.clone(),
283                })
284            })
285            .collect();
286        if !new_imports.is_empty() {
287            db.insert_imports(file_id, &new_imports)?;
288        }
289
290        let call_edges: Vec<NewCall> = crate::call_graph::extract_calls_from_source(file, &source)
291            .into_iter()
292            .map(|e| NewCall {
293                caller_name: e.caller_name,
294                callee_name: e.callee_name,
295                line: e.line as i64,
296            })
297            .collect();
298        if !call_edges.is_empty() {
299            db.insert_calls(file_id, &call_edges)?;
300        }
301
302        Ok(symbols)
303    }
304}