Skip to main content

codelens_engine/db/
ops.rs

1use anyhow::{Context, Result};
2use rusqlite::{Connection, OptionalExtension, Row, params};
3
4use super::{DirStats, FileRow, IndexDb, NewCall, NewImport, NewSymbol, SymbolRow, SymbolWithFile};
5
6/// Build FTS5 query: split into tokens, add prefix matching (*), join with OR.
7/// e.g. "run_service" → "run" * OR "service" *
8/// e.g. "ServiceManager" → "ServiceManager" *
9fn fts5_escape(query: &str) -> String {
10    let tokens: Vec<String> = query
11        .split(|c: char| c.is_whitespace() || c == '_' || c == '-')
12        .filter(|t| !t.is_empty())
13        .map(|token| {
14            let escaped = token.replace('"', "\"\"");
15            // FTS5 prefix query: token* matches any token starting with this string
16            format!("{escaped}*")
17        })
18        .collect();
19    if tokens.is_empty() {
20        let escaped = query.replace('"', "\"\"");
21        return format!("{escaped}*");
22    }
23    tokens.join(" OR ")
24}
25
26fn symbol_row_from_row(row: &Row<'_>) -> rusqlite::Result<SymbolRow> {
27    Ok(SymbolRow {
28        id: row.get(0)?,
29        file_id: row.get(1)?,
30        name: row.get(2)?,
31        kind: row.get(3)?,
32        line: row.get(4)?,
33        column_num: row.get(5)?,
34        start_byte: row.get(6)?,
35        end_byte: row.get(7)?,
36        signature: row.get(8)?,
37        name_path: row.get(9)?,
38        parent_id: row.get(10)?,
39    })
40}
41
42// ---- Transaction-compatible free functions ----
43// These accept &Connection so they work with both Connection and Transaction (via Deref).
44
45/// Returns the file row if it exists and is fresh (same mtime and hash).
46pub(crate) fn get_fresh_file(
47    conn: &Connection,
48    relative_path: &str,
49    mtime_ms: i64,
50    content_hash: &str,
51) -> Result<Option<FileRow>> {
52    conn.query_row(
53        "SELECT id, relative_path, mtime_ms, content_hash, size_bytes, language
54         FROM files WHERE relative_path = ?1 AND mtime_ms = ?2 AND content_hash = ?3",
55        params![relative_path, mtime_ms, content_hash],
56        |row| {
57            Ok(FileRow {
58                id: row.get(0)?,
59                relative_path: row.get(1)?,
60                mtime_ms: row.get(2)?,
61                content_hash: row.get(3)?,
62                size_bytes: row.get(4)?,
63                language: row.get(5)?,
64            })
65        },
66    )
67    .optional()
68    .context("get_fresh_file query failed")
69}
70
71/// Upsert a file record. Returns the file id. Deletes old symbols/imports on update.
72pub(crate) fn upsert_file(
73    conn: &Connection,
74    relative_path: &str,
75    mtime_ms: i64,
76    content_hash: &str,
77    size_bytes: i64,
78    language: Option<&str>,
79) -> Result<i64> {
80    let now = std::time::SystemTime::now()
81        .duration_since(std::time::UNIX_EPOCH)
82        .unwrap_or_default()
83        .as_millis() as i64;
84
85    let id: i64 = conn.query_row(
86        "INSERT INTO files (relative_path, mtime_ms, content_hash, size_bytes, language, indexed_at)
87         VALUES (?1, ?2, ?3, ?4, ?5, ?6)
88         ON CONFLICT(relative_path) DO UPDATE SET
89            mtime_ms = excluded.mtime_ms,
90            content_hash = excluded.content_hash,
91            size_bytes = excluded.size_bytes,
92            language = excluded.language,
93            indexed_at = excluded.indexed_at
94         RETURNING id",
95        params![relative_path, mtime_ms, content_hash, size_bytes, language, now],
96        |row| row.get(0),
97    )?;
98
99    conn.execute("DELETE FROM symbols WHERE file_id = ?1", params![id])?;
100    conn.execute("DELETE FROM imports WHERE source_file_id = ?1", params![id])?;
101    conn.execute("DELETE FROM calls WHERE caller_file_id = ?1", params![id])?;
102
103    Ok(id)
104}
105
106/// Delete a file and its associated symbols/imports.
107pub(crate) fn delete_file(conn: &Connection, relative_path: &str) -> Result<()> {
108    conn.execute(
109        "DELETE FROM files WHERE relative_path = ?1",
110        params![relative_path],
111    )?;
112    Ok(())
113}
114
115/// Clear all symbol-index content in bulk.
116///
117/// Used when an index has drifted so far from the current file tree that
118/// per-file cascade deletes are slower than rebuilding the current candidate
119/// set from scratch.
120pub(crate) fn clear_symbol_index(conn: &Connection) -> Result<()> {
121    conn.execute_batch(
122        "DELETE FROM symbols;
123         DELETE FROM imports;
124         DELETE FROM calls;
125         DELETE FROM files;",
126    )?;
127    Ok(())
128}
129
130/// Per-directory aggregate stats.
131pub(crate) fn dir_stats(conn: &Connection) -> Result<Vec<DirStats>> {
132    // Fetch per-file symbol counts, then aggregate in Rust for accurate dir extraction
133    let mut stmt = conn.prepare_cached(
134        "SELECT f.relative_path, COUNT(s.id) AS sym_count
135         FROM files f LEFT JOIN symbols s ON s.file_id = f.id
136         GROUP BY f.id",
137    )?;
138    let rows = stmt.query_map([], |row| {
139        Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)? as usize))
140    })?;
141
142    let mut dir_map: std::collections::HashMap<String, (usize, usize)> =
143        std::collections::HashMap::new();
144    for row in rows {
145        let (path, sym_count) = row?;
146        let dir = match path.rfind('/') {
147            Some(pos) => &path[..=pos],
148            None => ".",
149        };
150        let entry = dir_map.entry(dir.to_owned()).or_insert((0, 0));
151        entry.0 += 1; // file count
152        entry.1 += sym_count; // symbol count
153    }
154
155    let mut result: Vec<DirStats> = dir_map
156        .into_iter()
157        .map(|(dir, (files, symbols))| DirStats {
158            dir,
159            files,
160            symbols,
161            imports_from_others: 0,
162        })
163        .collect();
164    result.sort_by_key(|b| std::cmp::Reverse(b.symbols));
165    Ok(result)
166}
167
168/// Return all indexed file paths.
169pub(crate) fn all_file_paths(conn: &Connection) -> Result<Vec<String>> {
170    let mut stmt = conn.prepare_cached("SELECT relative_path FROM files")?;
171    let rows = stmt.query_map([], |row| row.get(0))?;
172    let mut paths = Vec::new();
173    for row in rows {
174        paths.push(row?);
175    }
176    Ok(paths)
177}
178
179/// Return file paths that contain symbols of the given kinds (e.g. "class", "interface").
180pub(crate) fn files_with_symbol_kinds(conn: &Connection, kinds: &[&str]) -> Result<Vec<String>> {
181    if kinds.is_empty() {
182        return Ok(Vec::new());
183    }
184    let placeholders: String = kinds.iter().map(|_| "?").collect::<Vec<_>>().join(",");
185    let sql = format!(
186        "SELECT DISTINCT f.relative_path FROM files f \
187         JOIN symbols s ON s.file_id = f.id \
188         WHERE s.kind IN ({placeholders})"
189    );
190    let mut stmt = conn.prepare_cached(&sql)?;
191    let params: Vec<&dyn rusqlite::types::ToSql> = kinds
192        .iter()
193        .map(|k| k as &dyn rusqlite::types::ToSql)
194        .collect();
195    let rows = stmt.query_map(params.as_slice(), |row| row.get(0))?;
196    let mut paths = Vec::new();
197    for row in rows {
198        paths.push(row?);
199    }
200    Ok(paths)
201}
202
203/// Bulk insert symbols for a file. Returns the inserted symbol ids.
204pub(crate) fn insert_symbols(
205    conn: &Connection,
206    file_id: i64,
207    symbols: &[NewSymbol<'_>],
208) -> Result<Vec<i64>> {
209    let mut ids = Vec::with_capacity(symbols.len());
210    let mut stmt = conn.prepare_cached(
211        "INSERT INTO symbols (file_id, name, kind, line, column_num, start_byte, end_byte, signature, name_path, parent_id)
212         VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)",
213    )?;
214    for sym in symbols {
215        // #349: the `name`/`name_path` columns hold NFC — extraction
216        // already normalizes, this keeps direct ingestion paths honest.
217        let name = crate::unicode::nfc_identifier(sym.name);
218        let name_path = crate::unicode::nfc_identifier(sym.name_path);
219        stmt.execute(params![
220            file_id,
221            name.as_ref(),
222            sym.kind,
223            sym.line,
224            sym.column_num,
225            sym.start_byte,
226            sym.end_byte,
227            sym.signature,
228            name_path.as_ref(),
229            sym.parent_id,
230        ])?;
231        ids.push(conn.last_insert_rowid());
232    }
233    Ok(ids)
234}
235
236/// Bulk insert imports for a file.
237pub(crate) fn insert_imports(conn: &Connection, file_id: i64, imports: &[NewImport]) -> Result<()> {
238    let mut stmt = conn.prepare_cached(
239        "INSERT OR REPLACE INTO imports (source_file_id, target_path, raw_import)
240         VALUES (?1, ?2, ?3)",
241    )?;
242    for imp in imports {
243        stmt.execute(params![file_id, imp.target_path, imp.raw_import])?;
244    }
245    Ok(())
246}
247
248/// Bulk insert call edges for a file (clears old edges first).
249pub(crate) fn insert_calls(conn: &Connection, file_id: i64, calls: &[NewCall]) -> Result<()> {
250    conn.execute(
251        "DELETE FROM calls WHERE caller_file_id = ?1",
252        params![file_id],
253    )?;
254    let mut stmt = conn.prepare_cached(
255        "INSERT INTO calls (caller_file_id, caller_name, callee_name, line)
256         VALUES (?1, ?2, ?3, ?4)",
257    )?;
258    for call in calls {
259        // #353: same NFC contract as the symbols table (#349) — the
260        // `caller_name`/`callee_name` columns hold NFC so byte-exact
261        // lookups match the NFC queries agents type.
262        let caller_name = crate::unicode::nfc_identifier(&call.caller_name);
263        let callee_name = crate::unicode::nfc_identifier(&call.callee_name);
264        stmt.execute(params![
265            file_id,
266            caller_name.as_ref(),
267            callee_name.as_ref(),
268            call.line
269        ])?;
270    }
271    Ok(())
272}
273
274// ---- IndexDb impl: CRUD operations ----
275
276impl IndexDb {
277    // ---- File operations (delegating to free functions) ----
278
279    /// Fast mtime-only freshness check. Avoids content hashing entirely.
280    pub fn get_fresh_file_by_mtime(
281        &self,
282        relative_path: &str,
283        mtime_ms: i64,
284    ) -> Result<Option<FileRow>> {
285        self.conn
286            .query_row(
287                "SELECT id, relative_path, mtime_ms, content_hash, size_bytes, language
288                 FROM files WHERE relative_path = ?1 AND mtime_ms = ?2",
289                params![relative_path, mtime_ms],
290                |row| {
291                    Ok(FileRow {
292                        id: row.get(0)?,
293                        relative_path: row.get(1)?,
294                        mtime_ms: row.get(2)?,
295                        content_hash: row.get(3)?,
296                        size_bytes: row.get(4)?,
297                        language: row.get(5)?,
298                    })
299                },
300            )
301            .optional()
302            .context("get_fresh_file_by_mtime query failed")
303    }
304
305    /// Returns the file row if it exists and is fresh (same mtime and hash).
306    pub fn get_fresh_file(
307        &self,
308        relative_path: &str,
309        mtime_ms: i64,
310        content_hash: &str,
311    ) -> Result<Option<FileRow>> {
312        get_fresh_file(&self.conn, relative_path, mtime_ms, content_hash)
313    }
314
315    /// Returns the file row by path (regardless of freshness).
316    pub fn get_file(&self, relative_path: &str) -> Result<Option<FileRow>> {
317        self.conn
318            .query_row(
319                "SELECT id, relative_path, mtime_ms, content_hash, size_bytes, language
320                 FROM files WHERE relative_path = ?1",
321                params![relative_path],
322                |row| {
323                    Ok(FileRow {
324                        id: row.get(0)?,
325                        relative_path: row.get(1)?,
326                        mtime_ms: row.get(2)?,
327                        content_hash: row.get(3)?,
328                        size_bytes: row.get(4)?,
329                        language: row.get(5)?,
330                    })
331                },
332            )
333            .optional()
334            .context("get_file query failed")
335    }
336
337    /// Upsert a file record. Returns the file id. Deletes old symbols/imports on update.
338    pub fn upsert_file(
339        &self,
340        relative_path: &str,
341        mtime_ms: i64,
342        content_hash: &str,
343        size_bytes: i64,
344        language: Option<&str>,
345    ) -> Result<i64> {
346        upsert_file(
347            &self.conn,
348            relative_path,
349            mtime_ms,
350            content_hash,
351            size_bytes,
352            language,
353        )
354    }
355
356    /// Delete a file and its associated symbols/imports.
357    pub fn delete_file(&self, relative_path: &str) -> Result<()> {
358        delete_file(&self.conn, relative_path)
359    }
360
361    /// Count indexed files.
362    pub fn file_count(&self) -> Result<usize> {
363        let count: i64 = self
364            .conn
365            .query_row("SELECT COUNT(*) FROM files", [], |row| row.get(0))?;
366        Ok(count as usize)
367    }
368
369    /// Newest `indexed_at` epoch (seconds) across all files in the index,
370    /// or `None` when the table is empty. Callers compare against the
371    /// current wall-clock to surface a freshness hint on responses.
372    pub fn max_files_indexed_at(&self) -> Result<Option<i64>> {
373        let row: Option<i64> = self
374            .conn
375            .query_row("SELECT MAX(indexed_at) FROM files", [], |row| row.get(0))
376            .optional()?;
377        Ok(row)
378    }
379
380    /// Per-language indexed-file counts, descending. `files.language` stores
381    /// the file extension at index time (e.g. `rs`, `py`, `ts`), so callers
382    /// can map an entry straight back to an extension-keyed lookup. Rows with
383    /// a NULL language are skipped.
384    pub fn language_file_counts(&self) -> Result<Vec<(String, usize)>> {
385        let mut stmt = self.conn.prepare(
386            "SELECT language, COUNT(*) FROM files \
387             WHERE language IS NOT NULL GROUP BY language ORDER BY COUNT(*) DESC",
388        )?;
389        let rows = stmt.query_map([], |row| {
390            Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)? as usize))
391        })?;
392        let mut counts = Vec::new();
393        for row in rows {
394            counts.push(row?);
395        }
396        Ok(counts)
397    }
398
399    /// Oldest `indexed_at` epoch (seconds) across all files in the index,
400    /// or `None` when the table is empty.
401    pub fn min_files_indexed_at(&self) -> Result<Option<i64>> {
402        let row: Option<i64> = self
403            .conn
404            .query_row("SELECT MIN(indexed_at) FROM files", [], |row| row.get(0))
405            .optional()?;
406        Ok(row)
407    }
408
409    /// Return all indexed file paths.
410    pub fn all_file_paths(&self) -> Result<Vec<String>> {
411        all_file_paths(&self.conn)
412    }
413
414    /// Return file paths containing symbols of given kinds.
415    pub fn files_with_symbol_kinds(&self, kinds: &[&str]) -> Result<Vec<String>> {
416        files_with_symbol_kinds(&self.conn, kinds)
417    }
418
419    pub fn dir_stats(&self) -> Result<Vec<DirStats>> {
420        dir_stats(&self.conn)
421    }
422
423    // ---- Symbol operations ----
424
425    /// Bulk insert symbols for a file. Returns the inserted symbol ids.
426    pub fn insert_symbols(&self, file_id: i64, symbols: &[NewSymbol<'_>]) -> Result<Vec<i64>> {
427        insert_symbols(&self.conn, file_id, symbols)
428    }
429
430    /// Query symbols by name (exact or substring match).
431    pub fn find_symbols_by_name(
432        &self,
433        name: &str,
434        file_path: Option<&str>,
435        exact: bool,
436        max_results: usize,
437    ) -> Result<Vec<SymbolRow>> {
438        // #349: stored names are NFC (normalized at extraction) — match
439        // the query to the same form so NFD input still hits.
440        let name = crate::unicode::nfc_identifier(name);
441        let name = name.as_ref();
442        let (sql, use_file_filter) = match (exact, file_path.is_some()) {
443            (true, true) => (
444                "SELECT s.id, s.file_id, s.name, s.kind, s.line, s.column_num, s.start_byte, s.end_byte, s.signature, s.name_path, s.parent_id
445                 FROM symbols s JOIN files f ON s.file_id = f.id
446                 WHERE s.name = ?1 AND f.relative_path = ?2
447                 LIMIT ?3",
448                true,
449            ),
450            (true, false) => (
451                "SELECT id, file_id, name, kind, line, column_num, start_byte, end_byte, signature, name_path, parent_id
452                 FROM symbols WHERE name = ?1
453                 LIMIT ?2",
454                false,
455            ),
456            (false, true) => (
457                "SELECT s.id, s.file_id, s.name, s.kind, s.line, s.column_num, s.start_byte, s.end_byte, s.signature, s.name_path, s.parent_id
458                 FROM symbols s JOIN files f ON s.file_id = f.id
459                 WHERE s.name LIKE '%' || ?1 || '%' AND f.relative_path = ?2
460                 ORDER BY LENGTH(s.name), s.name
461                 LIMIT ?3",
462                true,
463            ),
464            (false, false) => (
465                "SELECT id, file_id, name, kind, line, column_num, start_byte, end_byte, signature, name_path, parent_id
466                 FROM symbols WHERE name LIKE '%' || ?1 || '%'
467                 ORDER BY LENGTH(name), name
468                 LIMIT ?2",
469                false,
470            ),
471        };
472
473        let mut stmt = self.conn.prepare_cached(sql)?;
474        let mut rows = if use_file_filter {
475            stmt.query(params![name, file_path.unwrap_or(""), max_results as i64])?
476        } else {
477            stmt.query(params![name, max_results as i64])?
478        };
479
480        let mut results = Vec::new();
481        while let Some(row) = rows.next()? {
482            results.push(symbol_row_from_row(row)?);
483        }
484        Ok(results)
485    }
486
487    /// Query symbols by exact `name_path` within one indexed file.
488    pub fn find_symbols_by_name_path(
489        &self,
490        file_path: &str,
491        name_path: &str,
492        max_results: usize,
493    ) -> Result<Vec<SymbolRow>> {
494        let name_path = crate::unicode::nfc_identifier(name_path);
495        let name_path = name_path.as_ref();
496        let mut stmt = self.conn.prepare_cached(
497            "SELECT s.id, s.file_id, s.name, s.kind, s.line, s.column_num, s.start_byte, s.end_byte, s.signature, s.name_path, s.parent_id
498             FROM symbols s JOIN files f ON s.file_id = f.id
499             WHERE s.name_path = ?1 AND f.relative_path = ?2
500             LIMIT ?3",
501        )?;
502        let mut rows = stmt.query(params![name_path, file_path, max_results as i64])?;
503
504        let mut results = Vec::new();
505        while let Some(row) = rows.next()? {
506            results.push(symbol_row_from_row(row)?);
507        }
508        Ok(results)
509    }
510
511    /// Query symbols by name with file path resolved via JOIN (no N+1).
512    /// Returns (SymbolRow, file_path) tuples.
513    pub fn find_symbols_with_path(
514        &self,
515        name: &str,
516        exact: bool,
517        max_results: usize,
518    ) -> Result<Vec<(SymbolRow, String)>> {
519        // #349: same NFC query normalization as `find_symbols_by_name`.
520        let name = crate::unicode::nfc_identifier(name);
521        let name = name.as_ref();
522        let sql = if exact {
523            "SELECT s.id, s.file_id, s.name, s.kind, s.line, s.column_num,
524                    s.start_byte, s.end_byte, s.signature, s.name_path, s.parent_id,
525                    f.relative_path
526             FROM symbols s JOIN files f ON s.file_id = f.id
527             WHERE s.name = ?1
528             LIMIT ?2"
529        } else {
530            "SELECT s.id, s.file_id, s.name, s.kind, s.line, s.column_num,
531                    s.start_byte, s.end_byte, s.signature, s.name_path, s.parent_id,
532                    f.relative_path
533             FROM symbols s JOIN files f ON s.file_id = f.id
534             WHERE s.name LIKE '%' || ?1 || '%'
535             LIMIT ?2"
536        };
537
538        let mut stmt = self.conn.prepare_cached(sql)?;
539        let mut rows = stmt.query(params![name, max_results as i64])?;
540        let mut results = Vec::new();
541        while let Some(row) = rows.next()? {
542            results.push((symbol_row_from_row(row)?, row.get::<_, String>(11)?));
543        }
544        Ok(results)
545    }
546
547    /// Get all symbols for a file, ordered by start_byte.
548    pub fn get_file_symbols(&self, file_id: i64) -> Result<Vec<SymbolRow>> {
549        let mut stmt = self.conn.prepare_cached(
550            "SELECT id, file_id, name, kind, line, column_num, start_byte, end_byte, signature, name_path, parent_id
551             FROM symbols WHERE file_id = ?1 ORDER BY start_byte",
552        )?;
553        let rows = stmt.query_map(params![file_id], symbol_row_from_row)?;
554        let mut results = Vec::new();
555        for row in rows {
556            results.push(row?);
557        }
558        Ok(results)
559    }
560
561    /// Full-text search symbols via FTS5 index. Returns (SymbolRow, file_path, rank).
562    /// Falls back to LIKE search if FTS5 table doesn't exist (pre-v4 DB).
563    pub fn search_symbols_fts(
564        &self,
565        query: &str,
566        max_results: usize,
567    ) -> Result<Vec<(SymbolRow, String, f64)>> {
568        // Check if FTS5 table exists
569        let fts_exists: bool = self
570            .conn
571            .query_row(
572                "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='symbols_fts'",
573                [],
574                |row| row.get::<_, i64>(0),
575            )
576            .map(|c| c > 0)
577            .unwrap_or(false);
578
579        if !fts_exists {
580            // Fallback: LIKE search with JOIN
581            return self
582                .find_symbols_with_path(query, false, max_results)
583                .map(|rows| rows.into_iter().map(|(r, p)| (r, p, 0.0)).collect());
584        }
585
586        // Lazy rebuild: rebuild FTS index if stale (symbols changed since last rebuild).
587        // Uses meta keys for count freshness + timestamp cooldown (30s) to avoid
588        // expensive COUNT(*) + rebuild on every search call.
589        let now_secs = std::time::SystemTime::now()
590            .duration_since(std::time::UNIX_EPOCH)
591            .map(|d| d.as_secs() as i64)
592            .unwrap_or(0);
593        let last_rebuild_ts: i64 = self
594            .conn
595            .query_row(
596                "SELECT value FROM meta WHERE key = 'fts_rebuild_ts'",
597                [],
598                |row| row.get::<_, String>(0),
599            )
600            .optional()?
601            .and_then(|v| v.parse::<i64>().ok())
602            .unwrap_or(0);
603
604        if now_secs - last_rebuild_ts > 30 {
605            let fts_fresh: bool = self
606                .conn
607                .query_row(
608                    "SELECT value FROM meta WHERE key = 'fts_symbol_count'",
609                    [],
610                    |row| row.get::<_, String>(0),
611                )
612                .optional()?
613                .and_then(|v| v.parse::<i64>().ok())
614                .map(|cached_count| {
615                    let current: i64 = self
616                        .conn
617                        .query_row("SELECT COUNT(*) FROM symbols", [], |row| row.get(0))
618                        .unwrap_or(0);
619                    cached_count == current
620                })
621                .unwrap_or(false);
622
623            if !fts_fresh {
624                let sym_count: i64 = self
625                    .conn
626                    .query_row("SELECT COUNT(*) FROM symbols", [], |row| row.get(0))
627                    .unwrap_or(0);
628                if sym_count > 0 {
629                    let _ = self
630                        .conn
631                        .execute_batch("INSERT INTO symbols_fts(symbols_fts) VALUES('rebuild')");
632                    let _ = self.conn.execute(
633                        "INSERT OR REPLACE INTO meta (key, value) VALUES ('fts_symbol_count', ?1)",
634                        params![sym_count.to_string()],
635                    );
636                }
637                let _ = self.conn.execute(
638                    "INSERT OR REPLACE INTO meta (key, value) VALUES ('fts_rebuild_ts', ?1)",
639                    params![now_secs.to_string()],
640                );
641            }
642        }
643
644        // Escape FTS5 special chars and build query
645        let fts_query = fts5_escape(query);
646        let mut stmt = self.conn.prepare_cached(
647            "SELECT s.id, s.file_id, s.name, s.kind, s.line, s.column_num,
648                    s.start_byte, s.end_byte, s.signature, s.name_path, s.parent_id,
649                    f.relative_path, rank
650             FROM symbols_fts
651             JOIN symbols s ON symbols_fts.rowid = s.id
652             JOIN files f ON s.file_id = f.id
653             WHERE symbols_fts MATCH ?1
654             ORDER BY rank
655             LIMIT ?2",
656        )?;
657
658        let mut rows = stmt.query(params![fts_query, max_results as i64])?;
659        let mut results = Vec::new();
660        while let Some(row) = rows.next()? {
661            results.push((
662                symbol_row_from_row(row)?,
663                row.get::<_, String>(11)?,
664                row.get::<_, f64>(12)?,
665            ));
666        }
667        Ok(results)
668    }
669
670    /// Get all symbols for files under a directory prefix in a single JOIN query.
671    /// Returns `(file_path, Vec<SymbolRow>)` grouped by file. Eliminates N+1 queries.
672    pub fn get_symbols_for_directory(&self, prefix: &str) -> Result<Vec<(String, Vec<SymbolRow>)>> {
673        let pattern = if prefix.is_empty() || prefix == "." {
674            "%".to_owned()
675        } else {
676            format!("{prefix}%")
677        };
678        let mut stmt = self.conn.prepare_cached(
679            "SELECT s.id, s.file_id, s.name, s.kind, s.line, s.column_num,
680                    s.start_byte, s.end_byte, s.signature, s.name_path, s.parent_id,
681                    f.relative_path
682             FROM symbols s
683             JOIN files f ON s.file_id = f.id
684             WHERE f.relative_path LIKE ?1
685             ORDER BY s.file_id, s.start_byte",
686        )?;
687        let rows = stmt.query_map(params![pattern], |row| {
688            Ok((symbol_row_from_row(row)?, row.get::<_, String>(11)?))
689        })?;
690
691        let mut groups: Vec<(String, Vec<SymbolRow>)> = Vec::new();
692        let mut current_path = String::new();
693        for row in rows {
694            let (sym, path) = row?;
695            if path != current_path {
696                current_path = path.clone();
697                groups.push((path, Vec::new()));
698            }
699            groups.last_mut().unwrap().1.push(sym);
700        }
701        Ok(groups)
702    }
703
704    /// Return all symbols as (name, kind, file_path, line, signature, name_path).
705    #[allow(clippy::type_complexity)]
706    pub fn all_symbol_names(&self) -> Result<Vec<(String, String, String, i64, String, String)>> {
707        let mut stmt = self.conn.prepare_cached(
708            "SELECT s.name, s.kind, f.relative_path, s.line, s.signature, s.name_path
709             FROM symbols s JOIN files f ON s.file_id = f.id",
710        )?;
711        let rows = stmt.query_map([], |row| {
712            Ok((
713                row.get::<_, String>(0)?,
714                row.get::<_, String>(1)?,
715                row.get::<_, String>(2)?,
716                row.get::<_, i64>(3)?,
717                row.get::<_, String>(4)?,
718                row.get::<_, String>(5)?,
719            ))
720        })?;
721        let mut results = Vec::new();
722        for row in rows {
723            results.push(row?);
724        }
725        Ok(results)
726    }
727
728    /// Get all symbols with byte offsets and file paths, ordered by file for batch processing.
729    pub fn all_symbols_with_bytes(&self) -> Result<Vec<SymbolWithFile>> {
730        let mut stmt = self.conn.prepare_cached(
731            "SELECT s.name, s.kind, f.relative_path, s.line, s.signature, s.name_path,
732                    s.start_byte, s.end_byte
733             FROM symbols s JOIN files f ON s.file_id = f.id
734             ORDER BY s.file_id, s.start_byte",
735        )?;
736        let rows = stmt.query_map([], |row| {
737            Ok(SymbolWithFile {
738                name: row.get(0)?,
739                kind: row.get(1)?,
740                file_path: row.get(2)?,
741                line: row.get(3)?,
742                signature: row.get(4)?,
743                name_path: row.get(5)?,
744                start_byte: row.get(6)?,
745                end_byte: row.get(7)?,
746            })
747        })?;
748        let mut results = Vec::new();
749        for row in rows {
750            results.push(row?);
751        }
752        Ok(results)
753    }
754
755    /// Stream all symbols with bytes via callback — avoids loading entire Vec into memory.
756    /// Symbols are ordered by file_path then start_byte (same as all_symbols_with_bytes).
757    pub fn for_each_symbol_with_bytes<F>(&self, mut callback: F) -> Result<usize>
758    where
759        F: FnMut(SymbolWithFile) -> Result<()>,
760    {
761        let mut stmt = self.conn.prepare_cached(
762            "SELECT s.name, s.kind, f.relative_path, s.line, s.signature, s.name_path,
763                    s.start_byte, s.end_byte
764             FROM symbols s JOIN files f ON s.file_id = f.id
765             ORDER BY s.file_id, s.start_byte",
766        )?;
767        let mut rows = stmt.query([])?;
768        let mut count = 0usize;
769        while let Some(row) = rows.next()? {
770            callback(SymbolWithFile {
771                name: row.get(0)?,
772                kind: row.get(1)?,
773                file_path: row.get(2)?,
774                line: row.get(3)?,
775                signature: row.get(4)?,
776                name_path: row.get(5)?,
777                start_byte: row.get(6)?,
778                end_byte: row.get(7)?,
779            })?;
780            count += 1;
781        }
782        Ok(count)
783    }
784
785    /// Stream symbols grouped by file path via callback — avoids loading the
786    /// entire symbol table into memory and gives deterministic file-wise order.
787    pub fn for_each_file_symbols_with_bytes<F>(&self, mut callback: F) -> Result<usize>
788    where
789        F: FnMut(String, Vec<SymbolWithFile>) -> Result<()>,
790    {
791        let mut stmt = self.conn.prepare_cached(
792            "SELECT s.name, s.kind, f.relative_path, s.line, s.signature, s.name_path,
793                    s.start_byte, s.end_byte
794             FROM symbols s JOIN files f ON s.file_id = f.id
795             ORDER BY f.relative_path, s.start_byte",
796        )?;
797        let mut rows = stmt.query([])?;
798        let mut count = 0usize;
799        let mut current_file: Option<String> = None;
800        let mut current_symbols: Vec<SymbolWithFile> = Vec::new();
801
802        while let Some(row) = rows.next()? {
803            let symbol = SymbolWithFile {
804                name: row.get(0)?,
805                kind: row.get(1)?,
806                file_path: row.get(2)?,
807                line: row.get(3)?,
808                signature: row.get(4)?,
809                name_path: row.get(5)?,
810                start_byte: row.get(6)?,
811                end_byte: row.get(7)?,
812            };
813
814            if current_file.as_deref() != Some(symbol.file_path.as_str())
815                && let Some(previous_file) = current_file.replace(symbol.file_path.clone())
816            {
817                callback(previous_file, std::mem::take(&mut current_symbols))?;
818            }
819
820            current_symbols.push(symbol);
821            count += 1;
822        }
823
824        if let Some(file_path) = current_file {
825            callback(file_path, current_symbols)?;
826        }
827
828        Ok(count)
829    }
830
831    /// Get symbols with bytes for specific files only (for incremental embedding).
832    pub fn symbols_for_files(&self, file_paths: &[&str]) -> Result<Vec<SymbolWithFile>> {
833        if file_paths.is_empty() {
834            return Ok(Vec::new());
835        }
836        let placeholders: Vec<String> = (1..=file_paths.len()).map(|i| format!("?{i}")).collect();
837        let sql = format!(
838            "SELECT s.name, s.kind, f.relative_path, s.line, s.signature, s.name_path,
839                    s.start_byte, s.end_byte
840             FROM symbols s JOIN files f ON s.file_id = f.id
841             WHERE f.relative_path IN ({})
842             ORDER BY s.file_id, s.start_byte",
843            placeholders.join(", ")
844        );
845        let mut stmt = self.conn.prepare(&sql)?;
846        let params: Vec<&dyn rusqlite::types::ToSql> = file_paths
847            .iter()
848            .map(|p| p as &dyn rusqlite::types::ToSql)
849            .collect();
850        let rows = stmt.query_map(params.as_slice(), |row| {
851            Ok(SymbolWithFile {
852                name: row.get(0)?,
853                kind: row.get(1)?,
854                file_path: row.get(2)?,
855                line: row.get(3)?,
856                signature: row.get(4)?,
857                name_path: row.get(5)?,
858                start_byte: row.get(6)?,
859                end_byte: row.get(7)?,
860            })
861        })?;
862        let mut results = Vec::new();
863        for row in rows {
864            results.push(row?);
865        }
866        Ok(results)
867    }
868
869    /// Get file path for a file_id.
870    pub fn get_file_path(&self, file_id: i64) -> Result<Option<String>> {
871        self.conn
872            .query_row(
873                "SELECT relative_path FROM files WHERE id = ?1",
874                params![file_id],
875                |row| row.get(0),
876            )
877            .optional()
878            .context("get_file_path query failed")
879    }
880
881    // ---- Import operations ----
882
883    /// Bulk insert imports for a file.
884    pub fn insert_imports(&self, file_id: i64, imports: &[NewImport]) -> Result<()> {
885        insert_imports(&self.conn, file_id, imports)
886    }
887
888    /// Get files that import the given file path (reverse dependencies).
889    pub fn get_importers(&self, target_path: &str) -> Result<Vec<String>> {
890        let mut stmt = self.conn.prepare_cached(
891            "SELECT f.relative_path FROM imports i
892             JOIN files f ON i.source_file_id = f.id
893             WHERE i.target_path = ?1
894             ORDER BY f.relative_path",
895        )?;
896        let rows = stmt.query_map(params![target_path], |row| row.get(0))?;
897        let mut results = Vec::new();
898        for row in rows {
899            results.push(row?);
900        }
901        Ok(results)
902    }
903
904    /// Get files that the given file imports (forward dependencies).
905    pub fn get_imports_of(&self, relative_path: &str) -> Result<Vec<String>> {
906        let mut stmt = self.conn.prepare_cached(
907            "SELECT i.target_path FROM imports i
908             JOIN files f ON i.source_file_id = f.id
909             WHERE f.relative_path = ?1
910             ORDER BY i.target_path",
911        )?;
912        let rows = stmt.query_map(params![relative_path], |row| row.get(0))?;
913        let mut results = Vec::new();
914        for row in rows {
915            results.push(row?);
916        }
917        Ok(results)
918    }
919
920    /// Build the full import graph from the database.
921    #[allow(clippy::type_complexity)]
922    pub fn build_import_graph(
923        &self,
924    ) -> Result<std::collections::HashMap<String, (Vec<String>, Vec<String>)>> {
925        let mut graph = std::collections::HashMap::new();
926
927        for path in self.all_file_paths()? {
928            graph.insert(path, (Vec::new(), Vec::new()));
929        }
930
931        let mut stmt = self.conn.prepare_cached(
932            "SELECT f.relative_path, i.target_path FROM imports i
933             JOIN files f ON i.source_file_id = f.id",
934        )?;
935        let rows = stmt.query_map([], |row| {
936            Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
937        })?;
938        for row in rows {
939            let (source, target) = row?;
940            if let Some(entry) = graph.get_mut(&source) {
941                entry.0.push(target.clone());
942            }
943            if let Some(entry) = graph.get_mut(&target) {
944                entry.1.push(source.clone());
945            }
946        }
947
948        Ok(graph)
949    }
950
951    // ---- Call graph operations ----
952
953    /// Bulk insert call edges for a file (clears old edges first).
954    pub fn insert_calls(&self, file_id: i64, calls: &[NewCall]) -> Result<()> {
955        insert_calls(&self.conn, file_id, calls)
956    }
957
958    /// Find all callers of a function name (from DB cache).
959    pub fn get_callers_cached(
960        &self,
961        callee_name: &str,
962        max_results: usize,
963    ) -> Result<Vec<(String, String, i64)>> {
964        // #353: stored call-edge names are NFC — normalize the query side.
965        let callee_name = crate::unicode::nfc_identifier(callee_name);
966        let callee_name = callee_name.as_ref();
967        let mut stmt = self.conn.prepare_cached(
968            "SELECT f.relative_path, c.caller_name, c.line FROM calls c
969             JOIN files f ON c.caller_file_id = f.id
970             WHERE c.callee_name = ?1
971             ORDER BY f.relative_path, c.line
972             LIMIT ?2",
973        )?;
974        let mut rows = stmt.query(params![callee_name, max_results as i64])?;
975        let mut results = Vec::new();
976        while let Some(row) = rows.next()? {
977            results.push((row.get(0)?, row.get(1)?, row.get(2)?));
978        }
979        Ok(results)
980    }
981
982    /// Find all callees of a function name (from DB cache).
983    pub fn get_callees_cached(
984        &self,
985        caller_name: &str,
986        file_path: Option<&str>,
987        max_results: usize,
988    ) -> Result<Vec<(String, i64)>> {
989        // #353: stored call-edge names are NFC — normalize the query side.
990        let caller_name = crate::unicode::nfc_identifier(caller_name);
991        let caller_name = caller_name.as_ref();
992        let (sql, use_file) = match file_path {
993            Some(_) => (
994                "SELECT c.callee_name, c.line FROM calls c
995                 JOIN files f ON c.caller_file_id = f.id
996                 WHERE c.caller_name = ?1 AND f.relative_path = ?2
997                 ORDER BY c.line LIMIT ?3",
998                true,
999            ),
1000            None => (
1001                "SELECT c.callee_name, c.line FROM calls c
1002                 WHERE c.caller_name = ?1
1003                 ORDER BY c.line LIMIT ?2",
1004                false,
1005            ),
1006        };
1007        let mut stmt = self.conn.prepare_cached(sql)?;
1008        let mut rows = if use_file {
1009            stmt.query(params![
1010                caller_name,
1011                file_path.unwrap_or(""),
1012                max_results as i64
1013            ])?
1014        } else {
1015            stmt.query(params![caller_name, max_results as i64])?
1016        };
1017        let mut results = Vec::new();
1018        while let Some(row) = rows.next()? {
1019            results.push((row.get(0)?, row.get(1)?));
1020        }
1021        Ok(results)
1022    }
1023
1024    /// Check if calls table has any data.
1025    pub fn has_call_data(&self) -> Result<bool> {
1026        let count: i64 = self
1027            .conn
1028            .query_row("SELECT COUNT(*) FROM calls", [], |row| row.get(0))?;
1029        Ok(count > 0)
1030    }
1031
1032    // ---- Index failure tracking ----
1033
1034    /// Record an indexing failure for a file. Updates retry_count on conflict.
1035    pub fn record_index_failure(
1036        &self,
1037        file_path: &str,
1038        error_type: &str,
1039        error_message: &str,
1040    ) -> Result<()> {
1041        let now = std::time::SystemTime::now()
1042            .duration_since(std::time::UNIX_EPOCH)
1043            .unwrap_or_default()
1044            .as_secs() as i64;
1045        self.conn.execute(
1046            "INSERT INTO index_failures (file_path, error_type, error_message, failed_at, retry_count)
1047             VALUES (?1, ?2, ?3, ?4, 1)
1048             ON CONFLICT(file_path) DO UPDATE SET
1049                error_type = excluded.error_type,
1050                error_message = excluded.error_message,
1051                failed_at = excluded.failed_at,
1052                retry_count = retry_count + 1",
1053            params![file_path, error_type, error_message, now],
1054        )?;
1055        Ok(())
1056    }
1057
1058    /// Clear a failure record when a file is successfully indexed.
1059    pub fn clear_index_failure(&self, file_path: &str) -> Result<()> {
1060        self.conn.execute(
1061            "DELETE FROM index_failures WHERE file_path = ?1",
1062            params![file_path],
1063        )?;
1064        Ok(())
1065    }
1066
1067    /// Invalidate FTS index cache so next search triggers a lazy rebuild.
1068    pub fn invalidate_fts(&self) -> Result<()> {
1069        self.conn
1070            .execute("DELETE FROM meta WHERE key = 'fts_symbol_count'", [])?;
1071        Ok(())
1072    }
1073
1074    /// Get the number of files with indexing failures.
1075    pub fn index_failure_count(&self) -> Result<usize> {
1076        let count: i64 = self
1077            .conn
1078            .query_row("SELECT COUNT(*) FROM index_failures", [], |row| row.get(0))?;
1079        Ok(count as usize)
1080    }
1081
1082    /// Remove failure records for files that no longer exist on disk.
1083    pub fn prune_missing_index_failures(&self, project_root: &std::path::Path) -> Result<usize> {
1084        let mut stmt = self
1085            .conn
1086            .prepare_cached("SELECT file_path FROM index_failures ORDER BY file_path")?;
1087        let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
1088        let mut missing = Vec::new();
1089        for row in rows {
1090            let relative_path = row?;
1091            if !project_root.join(&relative_path).is_file() {
1092                missing.push(relative_path);
1093            }
1094        }
1095        for relative_path in &missing {
1096            self.clear_index_failure(relative_path)?;
1097        }
1098        Ok(missing.len())
1099    }
1100
1101    /// Summarize unresolved index failures by recency and persistence.
1102    pub fn index_failure_summary(
1103        &self,
1104        recent_window_secs: i64,
1105    ) -> Result<crate::db::IndexFailureSummary> {
1106        let now = std::time::SystemTime::now()
1107            .duration_since(std::time::UNIX_EPOCH)
1108            .unwrap_or_default()
1109            .as_secs() as i64;
1110        let recent_cutoff = now.saturating_sub(recent_window_secs.max(0));
1111
1112        let total_failures: i64 =
1113            self.conn
1114                .query_row("SELECT COUNT(*) FROM index_failures", [], |row| row.get(0))?;
1115        let recent_failures: i64 = self.conn.query_row(
1116            "SELECT COUNT(*) FROM index_failures WHERE failed_at >= ?1",
1117            params![recent_cutoff],
1118            |row| row.get(0),
1119        )?;
1120        let persistent_failures: i64 = self.conn.query_row(
1121            "SELECT COUNT(*) FROM index_failures WHERE retry_count >= 3",
1122            [],
1123            |row| row.get(0),
1124        )?;
1125
1126        Ok(crate::db::IndexFailureSummary {
1127            total_failures: total_failures as usize,
1128            recent_failures: recent_failures as usize,
1129            stale_failures: total_failures.saturating_sub(recent_failures) as usize,
1130            persistent_failures: persistent_failures as usize,
1131        })
1132    }
1133
1134    /// Get files that have failed more than `min_retries` times.
1135    pub fn get_persistent_failures(&self, min_retries: i64) -> Result<Vec<(String, String, i64)>> {
1136        let mut stmt = self.conn.prepare_cached(
1137            "SELECT file_path, error_message, retry_count FROM index_failures WHERE retry_count >= ?1 ORDER BY retry_count DESC",
1138        )?;
1139        let mut rows = stmt.query(params![min_retries])?;
1140        let mut results = Vec::new();
1141        while let Some(row) = rows.next()? {
1142            results.push((row.get(0)?, row.get(1)?, row.get(2)?));
1143        }
1144        Ok(results)
1145    }
1146}