Skip to main content

gobby_code/index/
api.rs

1use postgres::GenericClient;
2use serde::{Deserialize, Serialize};
3
4pub use crate::index::indexer::{
5    IndexDegradation, IndexDurations, IndexOutcome, IndexRequest, UnsupportedFileType, index_files,
6    project_changed_since,
7};
8
9use crate::models::{
10    CallRelation, ContentChunk, ImportRelation, IndexedFile, IndexedProject, Symbol,
11};
12
13const SYMBOL_UPSERT_BATCH_SIZE: usize = 500;
14
15#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
16pub struct CodeFactWriteRequest {
17    pub project_id: String,
18    pub file_path: String,
19    pub symbols: usize,
20    pub imports: usize,
21    pub calls: usize,
22    pub chunks: usize,
23}
24
25#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
26pub struct CodeFactWriteSummary {
27    pub files_written: usize,
28    pub symbols_written: usize,
29    pub imports_written: usize,
30    pub calls_written: usize,
31    pub chunks_written: usize,
32    pub graph_sync_pending: bool,
33    pub vectors_sync_pending: bool,
34}
35
36impl CodeFactWriteSummary {
37    pub fn for_file(symbols: usize, imports: usize, calls: usize, chunks: usize) -> Self {
38        Self {
39            files_written: 1,
40            symbols_written: symbols,
41            imports_written: imports,
42            calls_written: calls,
43            chunks_written: chunks,
44            graph_sync_pending: true,
45            vectors_sync_pending: true,
46        }
47    }
48}
49
50pub fn delete_file_facts(
51    conn: &mut impl GenericClient,
52    project_id: &str,
53    file_path: &str,
54) -> anyhow::Result<()> {
55    conn.execute(
56        "DELETE FROM code_symbols WHERE project_id = $1 AND file_path = $2",
57        &[&project_id, &file_path],
58    )?;
59    conn.execute(
60        "DELETE FROM code_indexed_files WHERE project_id = $1 AND file_path = $2",
61        &[&project_id, &file_path],
62    )?;
63    conn.execute(
64        "DELETE FROM code_content_chunks WHERE project_id = $1 AND file_path = $2",
65        &[&project_id, &file_path],
66    )?;
67    conn.execute(
68        "DELETE FROM code_imports WHERE project_id = $1 AND source_file = $2",
69        &[&project_id, &file_path],
70    )?;
71    conn.execute(
72        "DELETE FROM code_calls WHERE project_id = $1 AND file_path = $2",
73        &[&project_id, &file_path],
74    )?;
75    Ok(())
76}
77
78pub fn file_facts_exist(
79    conn: &mut impl GenericClient,
80    project_id: &str,
81    file_path: &str,
82) -> anyhow::Result<bool> {
83    let row = conn.query_one(
84        "SELECT
85            EXISTS(SELECT 1 FROM code_indexed_files WHERE project_id = $1 AND file_path = $2)
86            OR EXISTS(SELECT 1 FROM code_symbols WHERE project_id = $1 AND file_path = $2)
87            OR EXISTS(SELECT 1 FROM code_content_chunks WHERE project_id = $1 AND file_path = $2)
88            OR EXISTS(SELECT 1 FROM code_imports WHERE project_id = $1 AND source_file = $2)
89            OR EXISTS(SELECT 1 FROM code_calls WHERE project_id = $1 AND file_path = $2)",
90        &[&project_id, &file_path],
91    )?;
92    Ok(row.try_get(0)?)
93}
94
95pub fn upsert_symbols(conn: &mut impl GenericClient, symbols: &[Symbol]) -> anyhow::Result<usize> {
96    for chunk in symbols.chunks(SYMBOL_UPSERT_BATCH_SIZE) {
97        let ids = chunk.iter().map(|sym| sym.id.clone()).collect::<Vec<_>>();
98        let project_ids = chunk
99            .iter()
100            .map(|sym| sym.project_id.clone())
101            .collect::<Vec<_>>();
102        let file_paths = chunk
103            .iter()
104            .map(|sym| sym.file_path.clone())
105            .collect::<Vec<_>>();
106        let names = chunk.iter().map(|sym| sym.name.clone()).collect::<Vec<_>>();
107        let qualified_names = chunk
108            .iter()
109            .map(|sym| sym.qualified_name.clone())
110            .collect::<Vec<_>>();
111        let kinds = chunk.iter().map(|sym| sym.kind.clone()).collect::<Vec<_>>();
112        let languages = chunk
113            .iter()
114            .map(|sym| sym.language.clone())
115            .collect::<Vec<_>>();
116        let byte_starts = chunk
117            .iter()
118            .map(|sym| to_i32(sym.byte_start))
119            .collect::<Vec<_>>();
120        let byte_ends = chunk
121            .iter()
122            .map(|sym| to_i32(sym.byte_end))
123            .collect::<Vec<_>>();
124        let line_starts = chunk
125            .iter()
126            .map(|sym| to_i32(sym.line_start))
127            .collect::<Vec<_>>();
128        let line_ends = chunk
129            .iter()
130            .map(|sym| to_i32(sym.line_end))
131            .collect::<Vec<_>>();
132        let signatures = chunk
133            .iter()
134            .map(|sym| sym.signature.clone())
135            .collect::<Vec<_>>();
136        let docstrings = chunk
137            .iter()
138            .map(|sym| sym.docstring.clone())
139            .collect::<Vec<_>>();
140        let parent_symbol_ids = chunk
141            .iter()
142            .map(|sym| sym.parent_symbol_id.clone())
143            .collect::<Vec<_>>();
144        let content_hashes = chunk
145            .iter()
146            .map(|sym| sym.content_hash.clone())
147            .collect::<Vec<_>>();
148        let summaries = chunk
149            .iter()
150            .map(|sym| sym.summary.clone())
151            .collect::<Vec<_>>();
152
153        conn.execute(
154            "INSERT INTO code_symbols (
155                id, project_id, file_path, name, qualified_name,
156                kind, language, byte_start, byte_end,
157                line_start, line_end, signature, docstring,
158                parent_symbol_id, content_hash, summary,
159                created_at, updated_at
160            )
161            SELECT
162                id, project_id, file_path, name, qualified_name,
163                kind, language, byte_start, byte_end,
164                line_start, line_end, signature, docstring,
165                parent_symbol_id, content_hash, summary,
166                NOW(), NOW()
167            FROM unnest(
168                $1::text[], $2::text[], $3::text[], $4::text[],
169                $5::text[], $6::text[], $7::text[], $8::int4[],
170                $9::int4[], $10::int4[], $11::int4[], $12::text[],
171                $13::text[], $14::text[], $15::text[], $16::text[]
172            ) AS t(
173                id, project_id, file_path, name, qualified_name,
174                kind, language, byte_start, byte_end,
175                line_start, line_end, signature, docstring,
176                parent_symbol_id, content_hash, summary
177            )
178            ON CONFLICT(id) DO UPDATE SET
179                name=excluded.name, qualified_name=excluded.qualified_name,
180                kind=excluded.kind, byte_start=excluded.byte_start,
181                byte_end=excluded.byte_end, line_start=excluded.line_start,
182                line_end=excluded.line_end, signature=excluded.signature,
183                docstring=excluded.docstring, parent_symbol_id=excluded.parent_symbol_id,
184                language=excluded.language, content_hash=excluded.content_hash,
185                summary=CASE WHEN excluded.content_hash != code_symbols.content_hash
186                             THEN NULL ELSE code_symbols.summary END,
187                updated_at=NOW()",
188            &[
189                &ids,
190                &project_ids,
191                &file_paths,
192                &names,
193                &qualified_names,
194                &kinds,
195                &languages,
196                &byte_starts,
197                &byte_ends,
198                &line_starts,
199                &line_ends,
200                &signatures,
201                &docstrings,
202                &parent_symbol_ids,
203                &content_hashes,
204                &summaries,
205            ],
206        )?;
207    }
208    Ok(symbols.len())
209}
210
211pub fn upsert_file(conn: &mut impl GenericClient, file: &IndexedFile) -> anyhow::Result<()> {
212    conn.execute(
213        "INSERT INTO code_indexed_files (
214            id, project_id, file_path, language, content_hash,
215            symbol_count, byte_size, graph_synced, vectors_synced,
216            graph_sync_attempted_at, indexed_at
217        ) VALUES ($1,$2,$3,$4,$5,$6,$7,false,false,NULL,NOW())
218        ON CONFLICT(id) DO UPDATE SET
219            content_hash=excluded.content_hash,
220            symbol_count=excluded.symbol_count,
221            byte_size=excluded.byte_size,
222            graph_synced=false,
223            vectors_synced=false,
224            graph_sync_attempted_at=NULL,
225            indexed_at=NOW()",
226        &[
227            &file.id,
228            &file.project_id,
229            &file.file_path,
230            &file.language,
231            &file.content_hash,
232            &to_i32(file.symbol_count),
233            &to_i32(file.byte_size),
234        ],
235    )?;
236    Ok(())
237}
238
239pub fn upsert_content_chunks(
240    conn: &mut impl GenericClient,
241    chunks: &[ContentChunk],
242) -> anyhow::Result<usize> {
243    for chunk in chunks {
244        conn.execute(
245            "INSERT INTO code_content_chunks (
246                id, project_id, file_path, chunk_index,
247                line_start, line_end, content, language, created_at
248            ) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,NOW())
249            ON CONFLICT(id) DO UPDATE SET
250                content=excluded.content,
251                line_start=excluded.line_start,
252                line_end=excluded.line_end",
253            &[
254                &chunk.id,
255                &chunk.project_id,
256                &chunk.file_path,
257                &to_i32(chunk.chunk_index),
258                &to_i32(chunk.line_start),
259                &to_i32(chunk.line_end),
260                &chunk.content,
261                &chunk.language,
262            ],
263        )?;
264    }
265    Ok(chunks.len())
266}
267
268pub fn upsert_project_stats(
269    conn: &mut impl GenericClient,
270    project: &IndexedProject,
271) -> anyhow::Result<()> {
272    conn.execute(
273        "INSERT INTO code_indexed_projects (
274            id, root_path, total_files, total_symbols,
275            last_indexed_at, index_duration_ms
276        ) VALUES ($1,$2,$3,$4,NOW(),$5)
277        ON CONFLICT(id) DO UPDATE SET
278            root_path=excluded.root_path,
279            total_files=excluded.total_files,
280            total_symbols=excluded.total_symbols,
281            last_indexed_at=excluded.last_indexed_at,
282            index_duration_ms=excluded.index_duration_ms,
283            updated_at=NOW()",
284        &[
285            &project.id,
286            &project.root_path,
287            &to_i32(project.total_files),
288            &to_i32(project.total_symbols),
289            &to_i32(project.index_duration_ms as usize),
290        ],
291    )?;
292    Ok(())
293}
294
295pub fn upsert_imports(
296    conn: &mut impl GenericClient,
297    project_id: &str,
298    file_path: &str,
299    imports: &[ImportRelation],
300) -> anyhow::Result<usize> {
301    conn.execute(
302        "DELETE FROM code_imports WHERE project_id = $1 AND source_file = $2",
303        &[&project_id, &file_path],
304    )?;
305    for imp in imports {
306        conn.execute(
307            "INSERT INTO code_imports (project_id, source_file, target_module)
308             VALUES ($1, $2, $3)
309             ON CONFLICT (project_id, source_file, target_module) DO NOTHING",
310            &[&project_id, &imp.file_path, &imp.module_name],
311        )?;
312    }
313    Ok(imports.len())
314}
315
316pub fn upsert_calls(
317    conn: &mut impl GenericClient,
318    project_id: &str,
319    file_path: &str,
320    calls: &[CallRelation],
321) -> anyhow::Result<usize> {
322    conn.execute(
323        "DELETE FROM code_calls WHERE project_id = $1 AND file_path = $2",
324        &[&project_id, &file_path],
325    )?;
326    for call in calls {
327        conn.execute(
328            "INSERT INTO code_calls
329             (project_id, caller_symbol_id, callee_symbol_id, callee_name, \
330              callee_target_kind, callee_external_module, file_path, line)
331             VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
332             ON CONFLICT (
333                project_id, caller_symbol_id, callee_symbol_id, callee_name,
334                callee_target_kind, callee_external_module, file_path, line
335             ) DO NOTHING",
336            &[
337                &project_id,
338                &call.caller_symbol_id,
339                &call.callee_symbol_id.as_deref().unwrap_or(""),
340                &call.callee_name,
341                &call.callee_target_kind.as_str(),
342                &call.callee_external_module.as_deref().unwrap_or(""),
343                &call.file_path,
344                &to_i32(call.line),
345            ],
346        )?;
347    }
348    Ok(calls.len())
349}
350
351fn to_i32(value: usize) -> i32 {
352    value.min(i32::MAX as usize) as i32
353}