Skip to main content

reposcry_cache/
db.rs

1use std::path::Path;
2
3use anyhow::{Context, Result};
4use rusqlite::{params, Connection};
5use serde::{Deserialize, Serialize};
6
7use reposcry_graph::edge::EdgeKind;
8use reposcry_graph::symbol::{CallSite, Import, Symbol};
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct CachedFile {
12    pub id: i64,
13    pub path: String,
14    pub language: String,
15    pub hash: String,
16    pub size_bytes: i64,
17    pub loc: i64,
18    pub last_indexed_at: String,
19}
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct CachedImport {
23    pub id: i64,
24    pub file_id: i64,
25    pub source: String,
26    pub target: String,
27    pub is_relative: bool,
28    pub imported_names: Vec<String>,
29    pub line: u32,
30}
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct CachedEdge {
34    pub id: i64,
35    pub source_file_id: i64,
36    pub target_file_id: Option<i64>,
37    pub target_path: Option<String>,
38    pub kind: String,
39    pub confidence: f64,
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct CachedCallSite {
44    pub id: i64,
45    pub file_id: i64,
46    pub caller: String,
47    pub callee: String,
48    pub line: u32,
49    pub confidence: f64,
50    pub resolution_strategy: Option<String>,
51}
52
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct CachedSymbolEdge {
55    pub id: i64,
56    pub source_symbol_id: i64,
57    pub target_symbol_id: i64,
58    pub source_file_id: i64,
59    pub target_file_id: i64,
60    pub kind: String,
61    pub line: u32,
62    pub confidence: f64,
63    pub resolution_strategy: Option<String>,
64}
65
66#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct CachedSearchHit {
68    pub node_id: i64,
69    pub file_path: String,
70    pub kind: String,
71    pub name: String,
72    pub signature: Option<String>,
73    pub score: f64,
74    pub match_reason: String,
75}
76
77#[derive(Debug, Clone, Serialize, Deserialize)]
78pub struct CachedSearchVector {
79    pub node_id: i64,
80    pub file_path: String,
81    pub kind: String,
82    pub name: String,
83    pub signature: Option<String>,
84    pub backend: String,
85    pub dims: u32,
86    pub vector: Vec<f32>,
87}
88
89pub struct CacheDb {
90    conn: Connection,
91}
92
93impl CacheDb {
94    pub fn open(path: &Path) -> Result<Self> {
95        if let Some(parent) = path.parent() {
96            std::fs::create_dir_all(parent)?;
97        }
98        let conn = Connection::open(path).context("Failed to open cache database")?;
99        let db = Self { conn };
100        db.initialize()?;
101        Ok(db)
102    }
103
104    pub fn open_in_memory() -> Result<Self> {
105        let conn = Connection::open_in_memory()?;
106        let db = Self { conn };
107        db.initialize()?;
108        Ok(db)
109    }
110
111    fn initialize(&self) -> Result<()> {
112        self.conn.execute_batch(
113            "
114            PRAGMA foreign_keys = ON;
115            PRAGMA journal_mode = WAL;
116            PRAGMA synchronous = NORMAL;
117            PRAGMA cache_size = -64000;
118            PRAGMA temp_store = MEMORY;
119            PRAGMA busy_timeout = 5000;
120            CREATE TABLE IF NOT EXISTS files (
121                id INTEGER PRIMARY KEY,
122                path TEXT UNIQUE NOT NULL,
123                language TEXT NOT NULL DEFAULT '',
124                hash TEXT NOT NULL,
125                size_bytes INTEGER NOT NULL DEFAULT 0,
126                loc INTEGER NOT NULL DEFAULT 0,
127                last_indexed_at TEXT NOT NULL DEFAULT (datetime('now'))
128            );
129            CREATE TABLE IF NOT EXISTS symbols (
130                id INTEGER PRIMARY KEY,
131                file_id INTEGER NOT NULL,
132                name TEXT NOT NULL,
133                kind TEXT NOT NULL,
134                start_line INTEGER NOT NULL DEFAULT 0,
135                end_line INTEGER NOT NULL DEFAULT 0,
136                signature TEXT,
137                visibility TEXT,
138                doc_comment TEXT,
139                FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
140            );
141            CREATE TABLE IF NOT EXISTS imports (
142                id INTEGER PRIMARY KEY,
143                file_id INTEGER NOT NULL,
144                source TEXT NOT NULL,
145                target TEXT NOT NULL,
146                is_relative INTEGER NOT NULL DEFAULT 0,
147                imported_names TEXT NOT NULL DEFAULT '[]',
148                line INTEGER NOT NULL DEFAULT 0,
149                FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
150            );
151            CREATE TABLE IF NOT EXISTS edges (
152                id INTEGER PRIMARY KEY,
153                source_file_id INTEGER NOT NULL,
154                target_file_id INTEGER,
155                target_path TEXT,
156                kind TEXT NOT NULL,
157                confidence REAL NOT NULL DEFAULT 1.0,
158                FOREIGN KEY (source_file_id) REFERENCES files(id) ON DELETE CASCADE,
159                FOREIGN KEY (target_file_id) REFERENCES files(id) ON DELETE CASCADE
160            );
161            CREATE TABLE IF NOT EXISTS call_sites (
162                id INTEGER PRIMARY KEY,
163                file_id INTEGER NOT NULL,
164                caller TEXT NOT NULL,
165                callee TEXT NOT NULL,
166                line INTEGER NOT NULL DEFAULT 0,
167                confidence REAL NOT NULL DEFAULT 1.0,
168                resolution_strategy TEXT,
169                FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
170            );
171            CREATE TABLE IF NOT EXISTS symbol_edges (
172                id INTEGER PRIMARY KEY,
173                source_symbol_id INTEGER NOT NULL,
174                target_symbol_id INTEGER NOT NULL,
175                source_file_id INTEGER NOT NULL,
176                target_file_id INTEGER NOT NULL,
177                kind TEXT NOT NULL,
178                line INTEGER NOT NULL DEFAULT 0,
179                confidence REAL NOT NULL DEFAULT 1.0,
180                resolution_strategy TEXT,
181                FOREIGN KEY (source_symbol_id) REFERENCES symbols(id) ON DELETE CASCADE,
182                FOREIGN KEY (target_symbol_id) REFERENCES symbols(id) ON DELETE CASCADE,
183                FOREIGN KEY (source_file_id) REFERENCES files(id) ON DELETE CASCADE,
184                FOREIGN KEY (target_file_id) REFERENCES files(id) ON DELETE CASCADE
185            );
186            CREATE VIRTUAL TABLE IF NOT EXISTS search_index USING fts5(
187                node_id UNINDEXED,
188                file_path,
189                kind,
190                name,
191                signature,
192                doc_comment,
193                imports,
194                content
195            );
196            CREATE TABLE IF NOT EXISTS search_vectors (
197                node_id INTEGER NOT NULL,
198                file_path TEXT NOT NULL,
199                kind TEXT NOT NULL,
200                name TEXT NOT NULL,
201                signature TEXT,
202                backend TEXT NOT NULL,
203                dims INTEGER NOT NULL,
204                vector BLOB NOT NULL,
205                PRIMARY KEY (node_id, backend)
206            );
207            CREATE TABLE IF NOT EXISTS git_changes (
208                id INTEGER PRIMARY KEY,
209                path TEXT NOT NULL,
210                status TEXT NOT NULL DEFAULT 'modified',
211                lines_added INTEGER NOT NULL DEFAULT 0,
212                lines_deleted INTEGER NOT NULL DEFAULT 0,
213                recorded_at TEXT NOT NULL DEFAULT (datetime('now'))
214            );
215            CREATE TABLE IF NOT EXISTS config (
216                key TEXT PRIMARY KEY,
217                value TEXT NOT NULL DEFAULT ''
218            );
219            ",
220        )?;
221        self.migrate_imports_table()?;
222        Ok(())
223    }
224
225    fn migrate_imports_table(&self) -> Result<()> {
226        let has_imported_names = {
227            let mut stmt = self.conn.prepare("PRAGMA table_info(imports)")?;
228            let columns = stmt.query_map([], |row| row.get::<_, String>(1))?;
229            let mut has_imported_names = false;
230            for col in columns {
231                if col? == "imported_names" {
232                    has_imported_names = true;
233                    break;
234                }
235            }
236            has_imported_names
237        };
238        if !has_imported_names {
239            self.conn.execute(
240                "ALTER TABLE imports ADD COLUMN imported_names TEXT NOT NULL DEFAULT '[]'",
241                [],
242            )?;
243        }
244        Ok(())
245    }
246
247    pub fn get_file_by_path(&self, path: &str) -> Result<Option<CachedFile>> {
248        let mut stmt = self.conn.prepare(
249            "SELECT id, path, language, hash, size_bytes, loc, last_indexed_at \
250             FROM files WHERE path = ?1",
251        )?;
252        let mut rows = stmt.query_map(params![path], |row| {
253            Ok(CachedFile {
254                id: row.get(0)?,
255                path: row.get(1)?,
256                language: row.get(2)?,
257                hash: row.get(3)?,
258                size_bytes: row.get(4)?,
259                loc: row.get(5)?,
260                last_indexed_at: row.get(6)?,
261            })
262        })?;
263        match rows.next() {
264            Some(Ok(file)) => Ok(Some(file)),
265            Some(Err(e)) => Err(e.into()),
266            None => Ok(None),
267        }
268    }
269
270    pub fn upsert_file(
271        &self,
272        path: &str,
273        language: &str,
274        hash: &str,
275        size_bytes: i64,
276        loc: i64,
277    ) -> Result<i64> {
278        self.conn.execute(
279            "INSERT INTO files (path, language, hash, size_bytes, loc, last_indexed_at) \
280             VALUES (?1, ?2, ?3, ?4, ?5, datetime('now')) \
281             ON CONFLICT(path) DO UPDATE SET \
282               language = excluded.language, \
283               hash = excluded.hash, \
284               size_bytes = excluded.size_bytes, \
285               loc = excluded.loc, \
286               last_indexed_at = datetime('now')",
287            params![path, language, hash, size_bytes, loc],
288        )?;
289        self.get_file_by_path(path)?
290            .map(|file| file.id)
291            .ok_or_else(|| anyhow::anyhow!("file not found after upsert: {}", path))
292    }
293
294    pub fn delete_file(&self, path: &str) -> Result<()> {
295        if let Some(file) = self.get_file_by_path(path)? {
296            self.conn
297                .execute("DELETE FROM files WHERE id = ?1", params![file.id])?;
298        }
299        Ok(())
300    }
301
302    pub fn insert_symbols(&self, file_id: i64, symbols: &[Symbol]) -> Result<()> {
303        let tx = self.conn.unchecked_transaction()?;
304        tx.execute("DELETE FROM symbols WHERE file_id = ?1", params![file_id])?;
305        for sym in symbols {
306            tx.execute(
307                "INSERT INTO symbols (file_id, name, kind, start_line, end_line, signature, visibility, doc_comment) \
308                 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
309                params![
310                    file_id,
311                    sym.name,
312                    sym.kind,
313                    sym.start_line,
314                    sym.end_line,
315                    sym.signature,
316                    sym.visibility,
317                    sym.doc_comment,
318                ],
319            )?;
320        }
321        tx.commit()?;
322        Ok(())
323    }
324
325    pub fn insert_imports(&self, file_id: i64, imports: &[Import]) -> Result<()> {
326        let tx = self.conn.unchecked_transaction()?;
327        tx.execute("DELETE FROM imports WHERE file_id = ?1", params![file_id])?;
328        for import in imports {
329            let imported_names = serde_json::to_string(&import.imported_names)?;
330            tx.execute(
331                "INSERT INTO imports (file_id, source, target, is_relative, imported_names, line) \
332                 VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
333                params![
334                    file_id,
335                    import.source,
336                    import.target,
337                    if import.is_relative { 1 } else { 0 },
338                    imported_names,
339                    import.line,
340                ],
341            )?;
342        }
343        tx.commit()?;
344        Ok(())
345    }
346
347    pub fn insert_call_sites(&self, file_id: i64, call_sites: &[CallSite]) -> Result<()> {
348        let tx = self.conn.unchecked_transaction()?;
349        tx.execute(
350            "DELETE FROM call_sites WHERE file_id = ?1",
351            params![file_id],
352        )?;
353        for call_site in call_sites {
354            tx.execute(
355                "INSERT INTO call_sites (file_id, caller, callee, line, confidence, resolution_strategy) \
356                 VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
357                params![
358                    file_id,
359                    call_site.caller,
360                    call_site.callee,
361                    call_site.line,
362                    call_site.confidence,
363                    call_site.resolution_strategy,
364                ],
365            )?;
366        }
367        tx.commit()?;
368        Ok(())
369    }
370
371    pub fn get_symbols_by_file(&self, file_id: i64) -> Result<Vec<Symbol>> {
372        let mut stmt = self.conn.prepare(
373            "SELECT s.id, s.name, s.kind, s.start_line, s.end_line, s.signature, s.visibility, s.doc_comment, f.path \
374             FROM symbols s JOIN files f ON s.file_id = f.id WHERE s.file_id = ?1 \
375             ORDER BY s.start_line ASC, s.name ASC",
376        )?;
377        let rows = stmt.query_map(params![file_id], |row| {
378            Ok(Symbol {
379                id: row.get(0)?,
380                file_path: row.get(8)?,
381                name: row.get(1)?,
382                kind: row.get(2)?,
383                start_line: row.get(3)?,
384                end_line: row.get(4)?,
385                signature: row.get(5)?,
386                visibility: row.get(6)?,
387                doc_comment: row.get(7)?,
388            })
389        })?;
390        rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
391    }
392
393    pub fn get_imports_by_file(&self, file_id: i64) -> Result<Vec<CachedImport>> {
394        let mut stmt = self.conn.prepare(
395            "SELECT id, file_id, source, target, is_relative, imported_names, line \
396             FROM imports WHERE file_id = ?1 \
397             ORDER BY line ASC, target ASC",
398        )?;
399        let rows = stmt.query_map(params![file_id], |row| {
400            let imported_names_json: String = row.get(5)?;
401            let imported_names = serde_json::from_str(&imported_names_json).unwrap_or_default();
402            Ok(CachedImport {
403                id: row.get(0)?,
404                file_id: row.get(1)?,
405                source: row.get(2)?,
406                target: row.get(3)?,
407                is_relative: row.get::<_, i64>(4)? != 0,
408                imported_names,
409                line: row.get::<_, i64>(6)? as u32,
410            })
411        })?;
412        rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
413    }
414
415    pub fn get_all_imports(&self) -> Result<Vec<CachedImport>> {
416        let mut stmt = self.conn.prepare(
417            "SELECT id, file_id, source, target, is_relative, imported_names, line \
418             FROM imports \
419             ORDER BY file_id ASC, line ASC, target ASC",
420        )?;
421        let rows = stmt.query_map([], |row| {
422            let imported_names_json: String = row.get(5)?;
423            let imported_names = serde_json::from_str(&imported_names_json).unwrap_or_default();
424            Ok(CachedImport {
425                id: row.get(0)?,
426                file_id: row.get(1)?,
427                source: row.get(2)?,
428                target: row.get(3)?,
429                is_relative: row.get::<_, i64>(4)? != 0,
430                imported_names,
431                line: row.get::<_, i64>(6)? as u32,
432            })
433        })?;
434        rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
435    }
436
437    pub fn get_call_sites_by_file(&self, file_id: i64) -> Result<Vec<CachedCallSite>> {
438        let mut stmt = self.conn.prepare(
439            "SELECT id, file_id, caller, callee, line, confidence, resolution_strategy \
440             FROM call_sites WHERE file_id = ?1 \
441             ORDER BY line ASC, callee ASC",
442        )?;
443        let rows = stmt.query_map(params![file_id], |row| {
444            Ok(CachedCallSite {
445                id: row.get(0)?,
446                file_id: row.get(1)?,
447                caller: row.get(2)?,
448                callee: row.get(3)?,
449                line: row.get::<_, i64>(4)? as u32,
450                confidence: row.get(5)?,
451                resolution_strategy: row.get(6)?,
452            })
453        })?;
454        rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
455    }
456
457    pub fn get_all_call_sites(&self) -> Result<Vec<CachedCallSite>> {
458        let mut stmt = self.conn.prepare(
459            "SELECT id, file_id, caller, callee, line, confidence, resolution_strategy \
460             FROM call_sites \
461             ORDER BY file_id ASC, line ASC, callee ASC",
462        )?;
463        let rows = stmt.query_map([], |row| {
464            Ok(CachedCallSite {
465                id: row.get(0)?,
466                file_id: row.get(1)?,
467                caller: row.get(2)?,
468                callee: row.get(3)?,
469                line: row.get::<_, i64>(4)? as u32,
470                confidence: row.get(5)?,
471                resolution_strategy: row.get(6)?,
472            })
473        })?;
474        rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
475    }
476
477    pub fn clear_edges_by_kind(&self, kind: EdgeKind) -> Result<()> {
478        self.conn
479            .execute("DELETE FROM edges WHERE kind = ?1", params![kind.as_str()])?;
480        Ok(())
481    }
482
483    pub fn clear_symbol_edges_by_kind(&self, kind: &str) -> Result<()> {
484        self.conn
485            .execute("DELETE FROM symbol_edges WHERE kind = ?1", params![kind])?;
486        Ok(())
487    }
488
489    pub fn clear_search_index(&self) -> Result<()> {
490        self.conn.execute("DELETE FROM search_index", [])?;
491        Ok(())
492    }
493
494    pub fn clear_search_vectors(&self, backend: Option<&str>) -> Result<()> {
495        match backend {
496            Some(backend) => {
497                self.conn.execute(
498                    "DELETE FROM search_vectors WHERE backend = ?1",
499                    params![backend],
500                )?;
501            }
502            None => {
503                self.conn.execute("DELETE FROM search_vectors", [])?;
504            }
505        }
506        Ok(())
507    }
508
509    pub fn has_search_vector(&self, node_id: i64, backend: &str) -> Result<bool> {
510        let mut stmt = self.conn.prepare(
511            "SELECT 1 FROM search_vectors WHERE node_id = ?1 AND backend = ?2 LIMIT 1",
512        )?;
513        let mut rows = stmt.query_map(params![node_id, backend], |row| row.get::<_, i64>(0))?;
514        match rows.next() {
515            Some(Ok(_)) => Ok(true),
516            Some(Err(error)) => Err(error.into()),
517            None => Ok(false),
518        }
519    }
520
521    pub fn prune_search_vectors_to_index(&self, backend: &str) -> Result<()> {
522        self.conn.execute(
523            "DELETE FROM search_vectors \
524             WHERE backend = ?1 \
525             AND node_id NOT IN (SELECT CAST(node_id AS INTEGER) FROM search_index)",
526            params![backend],
527        )?;
528        Ok(())
529    }
530
531    pub fn insert_edge(
532        &self,
533        source_file_id: i64,
534        target_file_id: Option<i64>,
535        target_path: Option<&str>,
536        kind: EdgeKind,
537        confidence: f64,
538    ) -> Result<()> {
539        self.conn.execute(
540            "INSERT INTO edges (source_file_id, target_file_id, target_path, kind, confidence) \
541             VALUES (?1, ?2, ?3, ?4, ?5)",
542            params![
543                source_file_id,
544                target_file_id,
545                target_path,
546                kind.as_str(),
547                confidence,
548            ],
549        )?;
550        Ok(())
551    }
552
553    pub fn get_edges_by_kind(&self, kind: EdgeKind) -> Result<Vec<CachedEdge>> {
554        let mut stmt = self.conn.prepare(
555            "SELECT id, source_file_id, target_file_id, target_path, kind, confidence \
556             FROM edges WHERE kind = ?1 \
557             ORDER BY source_file_id ASC, target_file_id ASC, target_path ASC",
558        )?;
559        let rows = stmt.query_map(params![kind.as_str()], |row| {
560            Ok(CachedEdge {
561                id: row.get(0)?,
562                source_file_id: row.get(1)?,
563                target_file_id: row.get(2)?,
564                target_path: row.get(3)?,
565                kind: row.get(4)?,
566                confidence: row.get(5)?,
567            })
568        })?;
569        rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
570    }
571
572    pub fn insert_symbol_edges(&self, edges: &[CachedSymbolEdge]) -> Result<()> {
573        if edges.is_empty() {
574            return Ok(());
575        }
576        let tx = self.conn.unchecked_transaction()?;
577        for edge in edges {
578            tx.execute(
579                "INSERT INTO symbol_edges (source_symbol_id, target_symbol_id, source_file_id, target_file_id, kind, line, confidence, resolution_strategy) \
580                 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
581                params![
582                    edge.source_symbol_id,
583                    edge.target_symbol_id,
584                    edge.source_file_id,
585                    edge.target_file_id,
586                    edge.kind,
587                    edge.line,
588                    edge.confidence,
589                    edge.resolution_strategy,
590                ],
591            )?;
592        }
593        tx.commit()?;
594        Ok(())
595    }
596
597    pub fn get_symbol_edges_by_kind(&self, kind: &str) -> Result<Vec<CachedSymbolEdge>> {
598        let mut stmt = self.conn.prepare(
599            "SELECT id, source_symbol_id, target_symbol_id, source_file_id, target_file_id, kind, line, confidence, resolution_strategy \
600             FROM symbol_edges WHERE kind = ?1 \
601             ORDER BY source_symbol_id ASC, target_symbol_id ASC, line ASC",
602        )?;
603        let rows = stmt.query_map(params![kind], |row| {
604            Ok(CachedSymbolEdge {
605                id: row.get(0)?,
606                source_symbol_id: row.get(1)?,
607                target_symbol_id: row.get(2)?,
608                source_file_id: row.get(3)?,
609                target_file_id: row.get(4)?,
610                kind: row.get(5)?,
611                line: row.get::<_, i64>(6)? as u32,
612                confidence: row.get(7)?,
613                resolution_strategy: row.get(8)?,
614            })
615        })?;
616        rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
617    }
618
619    pub fn insert_search_document(
620        &self,
621        node_id: i64,
622        file_path: &str,
623        kind: &str,
624        name: &str,
625        signature: Option<&str>,
626        doc_comment: Option<&str>,
627        imports: &str,
628        content: &str,
629    ) -> Result<()> {
630        self.conn.execute(
631            "INSERT INTO search_index (node_id, file_path, kind, name, signature, doc_comment, imports, content) \
632             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
633            params![
634                node_id,
635                file_path,
636                kind,
637                name,
638                signature,
639                doc_comment,
640                imports,
641                content,
642            ],
643        )?;
644        Ok(())
645    }
646
647    pub fn insert_search_vector(
648        &self,
649        node_id: i64,
650        file_path: &str,
651        kind: &str,
652        name: &str,
653        signature: Option<&str>,
654        backend: &str,
655        vector: &[f32],
656    ) -> Result<()> {
657        let mut bytes = Vec::with_capacity(vector.len() * std::mem::size_of::<f32>());
658        for value in vector {
659            bytes.extend_from_slice(&value.to_le_bytes());
660        }
661        self.conn.execute(
662            "INSERT OR REPLACE INTO search_vectors \
663             (node_id, file_path, kind, name, signature, backend, dims, vector) \
664             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
665            params![
666                node_id,
667                file_path,
668                kind,
669                name,
670                signature,
671                backend,
672                i64::try_from(vector.len()).unwrap_or(0),
673                bytes,
674            ],
675        )?;
676        Ok(())
677    }
678
679    pub fn search_nodes_fts(
680        &self,
681        query: &str,
682        kind: Option<&str>,
683        limit: usize,
684    ) -> Result<Vec<CachedSearchHit>> {
685        let limit = i64::try_from(limit).unwrap_or(50);
686        let hits = if let Some(kind) = kind {
687            let mut stmt = self.conn.prepare(
688                "SELECT node_id, file_path, kind, name, signature, bm25(search_index) \
689                 FROM search_index \
690                 WHERE search_index MATCH ?1 AND kind = ?2 \
691                 ORDER BY bm25(search_index) \
692                 LIMIT ?3",
693            )?;
694            let rows = stmt.query_map(params![query, kind, limit], |row| {
695                let score: f64 = row.get(5)?;
696                Ok(CachedSearchHit {
697                    node_id: row.get(0)?,
698                    file_path: row.get(1)?,
699                    kind: row.get(2)?,
700                    name: row.get(3)?,
701                    signature: row.get(4)?,
702                    score: -score,
703                    match_reason: "fts5".to_string(),
704                })
705            })?;
706            rows.collect::<std::result::Result<Vec<_>, _>>()?
707        } else {
708            let mut stmt = self.conn.prepare(
709                "SELECT node_id, file_path, kind, name, signature, bm25(search_index) \
710                 FROM search_index \
711                 WHERE search_index MATCH ?1 \
712                 ORDER BY bm25(search_index) \
713                 LIMIT ?2",
714            )?;
715            let rows = stmt.query_map(params![query, limit], |row| {
716                let score: f64 = row.get(5)?;
717                Ok(CachedSearchHit {
718                    node_id: row.get(0)?,
719                    file_path: row.get(1)?,
720                    kind: row.get(2)?,
721                    name: row.get(3)?,
722                    signature: row.get(4)?,
723                    score: -score,
724                    match_reason: "fts5".to_string(),
725                })
726            })?;
727            rows.collect::<std::result::Result<Vec<_>, _>>()?
728        };
729        Ok(hits)
730    }
731
732    pub fn get_search_vectors(
733        &self,
734        backend: &str,
735        kind: Option<&str>,
736    ) -> Result<Vec<CachedSearchVector>> {
737        let query = match kind {
738            Some(_) => {
739                "SELECT node_id, file_path, kind, name, signature, backend, dims, vector \
740                 FROM search_vectors WHERE backend = ?1 AND kind = ?2"
741            }
742            None => {
743                "SELECT node_id, file_path, kind, name, signature, backend, dims, vector \
744                 FROM search_vectors WHERE backend = ?1"
745            }
746        };
747        let mut stmt = self.conn.prepare(query)?;
748        let map_row = |row: &rusqlite::Row<'_>| {
749            let blob: Vec<u8> = row.get(7)?;
750            let mut vector = Vec::with_capacity(blob.len() / 4);
751            for chunk in blob.chunks_exact(4) {
752                vector.push(f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]));
753            }
754            Ok(CachedSearchVector {
755                node_id: row.get(0)?,
756                file_path: row.get(1)?,
757                kind: row.get(2)?,
758                name: row.get(3)?,
759                signature: row.get(4)?,
760                backend: row.get(5)?,
761                dims: row.get::<_, i64>(6)? as u32,
762                vector,
763            })
764        };
765        let rows = match kind {
766            Some(kind) => stmt.query_map(params![backend, kind], map_row)?,
767            None => stmt.query_map(params![backend], map_row)?,
768        };
769        Ok(rows.collect::<std::result::Result<Vec<_>, _>>()?)
770    }
771
772    pub fn set_config(&self, key: &str, value: &str) -> Result<()> {
773        self.conn.execute(
774            "INSERT INTO config (key, value) VALUES (?1, ?2) \
775             ON CONFLICT(key) DO UPDATE SET value = excluded.value",
776            params![key, value],
777        )?;
778        Ok(())
779    }
780
781    pub fn get_config(&self, key: &str) -> Result<Option<String>> {
782        let mut stmt = self
783            .conn
784            .prepare("SELECT value FROM config WHERE key = ?1")?;
785        let mut rows = stmt.query_map(params![key], |row| row.get(0))?;
786        match rows.next() {
787            Some(Ok(val)) => Ok(Some(val)),
788            Some(Err(e)) => Err(e.into()),
789            None => Ok(None),
790        }
791    }
792
793    pub fn file_count(&self) -> Result<i64> {
794        let count: i64 = self
795            .conn
796            .query_row("SELECT COUNT(*) FROM files", [], |row| row.get(0))?;
797        Ok(count)
798    }
799
800    pub fn symbol_count(&self) -> Result<i64> {
801        let count: i64 = self
802            .conn
803            .query_row("SELECT COUNT(*) FROM symbols", [], |row| row.get(0))?;
804        Ok(count)
805    }
806
807    pub fn import_count(&self) -> Result<i64> {
808        let count: i64 = self
809            .conn
810            .query_row("SELECT COUNT(*) FROM imports", [], |row| row.get(0))?;
811        Ok(count)
812    }
813
814    pub fn call_site_count(&self) -> Result<i64> {
815        let count: i64 = self
816            .conn
817            .query_row("SELECT COUNT(*) FROM call_sites", [], |row| row.get(0))?;
818        Ok(count)
819    }
820
821    pub fn symbol_edge_count(&self) -> Result<i64> {
822        let count: i64 = self
823            .conn
824            .query_row("SELECT COUNT(*) FROM symbol_edges", [], |row| row.get(0))?;
825        Ok(count)
826    }
827
828    pub fn edge_count(&self) -> Result<i64> {
829        let count: i64 = self
830            .conn
831            .query_row("SELECT COUNT(*) FROM edges", [], |row| row.get(0))?;
832        Ok(count)
833    }
834
835    pub fn get_all_files(&self) -> Result<Vec<CachedFile>> {
836        let mut stmt = self.conn.prepare(
837            "SELECT id, path, language, hash, size_bytes, loc, last_indexed_at \
838             FROM files \
839             ORDER BY path ASC",
840        )?;
841        let rows = stmt.query_map([], |row| {
842            Ok(CachedFile {
843                id: row.get(0)?,
844                path: row.get(1)?,
845                language: row.get(2)?,
846                hash: row.get(3)?,
847                size_bytes: row.get(4)?,
848                loc: row.get(5)?,
849                last_indexed_at: row.get(6)?,
850            })
851        })?;
852        rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
853    }
854
855    pub fn language_stats(&self) -> Result<Vec<(String, i64)>> {
856        let mut stmt = self.conn.prepare(
857            "SELECT language, COUNT(*) as cnt \
858             FROM files \
859             WHERE language != '' \
860             GROUP BY language \
861             ORDER BY cnt DESC",
862        )?;
863        let rows = stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?)))?;
864        rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
865    }
866}