Skip to main content

gobby_code/index/
api.rs

1use postgres::GenericClient;
2use serde::{Deserialize, Serialize};
3
4pub use crate::index::indexer::{
5    IndexDegradation, IndexDurations, IndexOutcome, IndexRequest, UnsupportedFileType, index_files,
6    project_changed_since,
7};
8
9use crate::models::{
10    CallRelation, ContentChunk, ImportRelation, IndexedFile, IndexedProject, Symbol,
11};
12
13const SYMBOL_UPSERT_BATCH_SIZE: usize = 500;
14
15#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
16pub struct CodeFactWriteRequest {
17    pub project_id: String,
18    pub file_path: String,
19    pub symbols: usize,
20    pub imports: usize,
21    pub calls: usize,
22    pub chunks: usize,
23}
24
25#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
26pub struct CodeFactWriteSummary {
27    pub files_written: usize,
28    pub symbols_written: usize,
29    pub imports_written: usize,
30    pub calls_written: usize,
31    pub chunks_written: usize,
32    pub graph_sync_pending: bool,
33    pub vectors_sync_pending: bool,
34}
35
36impl CodeFactWriteSummary {
37    pub fn for_file(symbols: usize, imports: usize, calls: usize, chunks: usize) -> Self {
38        Self {
39            files_written: 1,
40            symbols_written: symbols,
41            imports_written: imports,
42            calls_written: calls,
43            chunks_written: chunks,
44            graph_sync_pending: true,
45            vectors_sync_pending: true,
46        }
47    }
48}
49
50pub fn delete_file_facts(
51    conn: &mut impl GenericClient,
52    project_id: &str,
53    file_path: &str,
54) -> anyhow::Result<()> {
55    conn.execute(
56        "DELETE FROM code_symbols WHERE project_id = $1 AND file_path = $2",
57        &[&project_id, &file_path],
58    )?;
59    delete_file_non_symbol_facts(conn, project_id, file_path)
60}
61
62pub fn delete_file_non_symbol_facts(
63    conn: &mut impl GenericClient,
64    project_id: &str,
65    file_path: &str,
66) -> anyhow::Result<()> {
67    conn.execute(
68        "DELETE FROM code_indexed_files WHERE project_id = $1 AND file_path = $2",
69        &[&project_id, &file_path],
70    )?;
71    conn.execute(
72        "DELETE FROM code_content_chunks WHERE project_id = $1 AND file_path = $2",
73        &[&project_id, &file_path],
74    )?;
75    conn.execute(
76        "DELETE FROM code_imports WHERE project_id = $1 AND source_file = $2",
77        &[&project_id, &file_path],
78    )?;
79    conn.execute(
80        "DELETE FROM code_calls WHERE project_id = $1 AND file_path = $2",
81        &[&project_id, &file_path],
82    )?;
83    Ok(())
84}
85
86pub fn delete_stale_file_symbols(
87    conn: &mut impl GenericClient,
88    project_id: &str,
89    file_path: &str,
90    current_symbol_ids: &[String],
91) -> anyhow::Result<usize> {
92    let deleted = if current_symbol_ids.is_empty() {
93        conn.execute(
94            "DELETE FROM code_symbols WHERE project_id = $1 AND file_path = $2",
95            &[&project_id, &file_path],
96        )?
97    } else {
98        let current_symbol_ids = current_symbol_ids.to_vec();
99        conn.execute(
100            "DELETE FROM code_symbols
101             WHERE project_id = $1
102               AND file_path = $2
103               AND NOT (id = ANY($3::text[]))",
104            &[&project_id, &file_path, &current_symbol_ids],
105        )?
106    };
107    usize::try_from(deleted).map_err(|_| anyhow::anyhow!("deleted symbol count exceeds usize"))
108}
109
110pub fn file_facts_exist(
111    conn: &mut impl GenericClient,
112    project_id: &str,
113    file_path: &str,
114) -> anyhow::Result<bool> {
115    let row = conn.query_one(
116        "SELECT
117            EXISTS(SELECT 1 FROM code_indexed_files WHERE project_id = $1 AND file_path = $2)
118            OR EXISTS(SELECT 1 FROM code_symbols WHERE project_id = $1 AND file_path = $2)
119            OR EXISTS(SELECT 1 FROM code_content_chunks WHERE project_id = $1 AND file_path = $2)
120            OR EXISTS(SELECT 1 FROM code_imports WHERE project_id = $1 AND source_file = $2)
121            OR EXISTS(SELECT 1 FROM code_calls WHERE project_id = $1 AND file_path = $2)",
122        &[&project_id, &file_path],
123    )?;
124    Ok(row.try_get(0)?)
125}
126
127pub fn upsert_symbols(conn: &mut impl GenericClient, symbols: &[Symbol]) -> anyhow::Result<usize> {
128    for chunk in symbols.chunks(SYMBOL_UPSERT_BATCH_SIZE) {
129        let ids = chunk.iter().map(|sym| sym.id.clone()).collect::<Vec<_>>();
130        let project_ids = chunk
131            .iter()
132            .map(|sym| sym.project_id.clone())
133            .collect::<Vec<_>>();
134        let file_paths = chunk
135            .iter()
136            .map(|sym| sym.file_path.clone())
137            .collect::<Vec<_>>();
138        let names = chunk.iter().map(|sym| sym.name.clone()).collect::<Vec<_>>();
139        let qualified_names = chunk
140            .iter()
141            .map(|sym| sym.qualified_name.clone())
142            .collect::<Vec<_>>();
143        let kinds = chunk.iter().map(|sym| sym.kind.clone()).collect::<Vec<_>>();
144        let languages = chunk
145            .iter()
146            .map(|sym| sym.language.clone())
147            .collect::<Vec<_>>();
148        let byte_starts = chunk
149            .iter()
150            .map(|sym| to_i32(sym.byte_start))
151            .collect::<Vec<_>>();
152        let byte_ends = chunk
153            .iter()
154            .map(|sym| to_i32(sym.byte_end))
155            .collect::<Vec<_>>();
156        let line_starts = chunk
157            .iter()
158            .map(|sym| to_i32(sym.line_start))
159            .collect::<Vec<_>>();
160        let line_ends = chunk
161            .iter()
162            .map(|sym| to_i32(sym.line_end))
163            .collect::<Vec<_>>();
164        let signatures = chunk
165            .iter()
166            .map(|sym| sym.signature.clone())
167            .collect::<Vec<_>>();
168        let docstrings = chunk
169            .iter()
170            .map(|sym| sym.docstring.clone())
171            .collect::<Vec<_>>();
172        let parent_symbol_ids = chunk
173            .iter()
174            .map(|sym| sym.parent_symbol_id.clone())
175            .collect::<Vec<_>>();
176        let content_hashes = chunk
177            .iter()
178            .map(|sym| sym.content_hash.clone())
179            .collect::<Vec<_>>();
180        let summaries = chunk
181            .iter()
182            .map(|sym| sym.summary.clone())
183            .collect::<Vec<_>>();
184
185        conn.execute(
186            "INSERT INTO code_symbols (
187                id, project_id, file_path, name, qualified_name,
188                kind, language, byte_start, byte_end,
189                line_start, line_end, signature, docstring,
190                parent_symbol_id, content_hash, summary,
191                created_at, updated_at
192            )
193            SELECT
194                id, project_id, file_path, name, qualified_name,
195                kind, language, byte_start, byte_end,
196                line_start, line_end, signature, docstring,
197                parent_symbol_id, content_hash, summary,
198                NOW(), NOW()
199            FROM unnest(
200                $1::text[], $2::text[], $3::text[], $4::text[],
201                $5::text[], $6::text[], $7::text[], $8::int4[],
202                $9::int4[], $10::int4[], $11::int4[], $12::text[],
203                $13::text[], $14::text[], $15::text[], $16::text[]
204            ) AS t(
205                id, project_id, file_path, name, qualified_name,
206                kind, language, byte_start, byte_end,
207                line_start, line_end, signature, docstring,
208                parent_symbol_id, content_hash, summary
209            )
210            ON CONFLICT(id) DO UPDATE SET
211                name=excluded.name, qualified_name=excluded.qualified_name,
212                kind=excluded.kind, byte_start=excluded.byte_start,
213                byte_end=excluded.byte_end, line_start=excluded.line_start,
214                line_end=excluded.line_end, signature=excluded.signature,
215                docstring=excluded.docstring, parent_symbol_id=excluded.parent_symbol_id,
216                language=excluded.language, content_hash=excluded.content_hash,
217                summary=CASE WHEN excluded.content_hash != code_symbols.content_hash
218                             THEN NULL ELSE code_symbols.summary END,
219                updated_at=NOW()",
220            &[
221                &ids,
222                &project_ids,
223                &file_paths,
224                &names,
225                &qualified_names,
226                &kinds,
227                &languages,
228                &byte_starts,
229                &byte_ends,
230                &line_starts,
231                &line_ends,
232                &signatures,
233                &docstrings,
234                &parent_symbol_ids,
235                &content_hashes,
236                &summaries,
237            ],
238        )?;
239    }
240    Ok(symbols.len())
241}
242
243pub fn upsert_file(conn: &mut impl GenericClient, file: &IndexedFile) -> anyhow::Result<()> {
244    conn.execute(
245        "INSERT INTO code_indexed_files (
246            id, project_id, file_path, language, content_hash,
247            symbol_count, byte_size, graph_synced, vectors_synced,
248            graph_sync_attempted_at, indexed_at
249        ) VALUES ($1,$2,$3,$4,$5,$6,$7,false,false,NULL,NOW())
250        ON CONFLICT(id) DO UPDATE SET
251            content_hash=excluded.content_hash,
252            symbol_count=excluded.symbol_count,
253            byte_size=excluded.byte_size,
254            graph_synced=false,
255            vectors_synced=false,
256            graph_sync_attempted_at=NULL,
257            indexed_at=NOW()",
258        &[
259            &file.id,
260            &file.project_id,
261            &file.file_path,
262            &file.language,
263            &file.content_hash,
264            &to_i32(file.symbol_count),
265            &to_i32(file.byte_size),
266        ],
267    )?;
268    Ok(())
269}
270
271pub fn upsert_project_seed(
272    conn: &mut impl GenericClient,
273    project_id: &str,
274    root_path: &std::path::Path,
275) -> anyhow::Result<()> {
276    let root_path = root_path.to_string_lossy().to_string();
277    conn.execute(
278        "INSERT INTO code_indexed_projects (
279            id, root_path, total_files, total_symbols,
280            last_indexed_at, index_duration_ms
281        ) VALUES ($1,$2,0,0,NULL,0)
282        ON CONFLICT(id) DO UPDATE SET
283            root_path=excluded.root_path,
284            updated_at=NOW()",
285        &[&project_id, &root_path],
286    )?;
287    Ok(())
288}
289
290pub fn upsert_content_chunks(
291    conn: &mut impl GenericClient,
292    chunks: &[ContentChunk],
293) -> anyhow::Result<usize> {
294    for chunk in chunks {
295        conn.execute(
296            "INSERT INTO code_content_chunks (
297                id, project_id, file_path, chunk_index,
298                line_start, line_end, content, language, created_at
299            ) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,NOW())
300            ON CONFLICT(id) DO UPDATE SET
301                content=excluded.content,
302                line_start=excluded.line_start,
303                line_end=excluded.line_end",
304            &[
305                &chunk.id,
306                &chunk.project_id,
307                &chunk.file_path,
308                &to_i32(chunk.chunk_index),
309                &to_i32(chunk.line_start),
310                &to_i32(chunk.line_end),
311                &chunk.content,
312                &chunk.language,
313            ],
314        )?;
315    }
316    Ok(chunks.len())
317}
318
319pub fn upsert_project_stats(
320    conn: &mut impl GenericClient,
321    project: &IndexedProject,
322) -> anyhow::Result<()> {
323    conn.execute(
324        "INSERT INTO code_indexed_projects (
325            id, root_path, total_files, total_symbols,
326            last_indexed_at, index_duration_ms
327        ) VALUES ($1,$2,$3,$4,NOW(),$5)
328        ON CONFLICT(id) DO UPDATE SET
329            root_path=excluded.root_path,
330            total_files=excluded.total_files,
331            total_symbols=excluded.total_symbols,
332            last_indexed_at=excluded.last_indexed_at,
333            index_duration_ms=excluded.index_duration_ms,
334            updated_at=NOW()",
335        &[
336            &project.id,
337            &project.root_path,
338            &to_i32(project.total_files),
339            &to_i32(project.total_symbols),
340            &to_i32(project.index_duration_ms as usize),
341        ],
342    )?;
343    Ok(())
344}
345
346pub fn upsert_imports(
347    conn: &mut impl GenericClient,
348    project_id: &str,
349    file_path: &str,
350    imports: &[ImportRelation],
351) -> anyhow::Result<usize> {
352    conn.execute(
353        "DELETE FROM code_imports WHERE project_id = $1 AND source_file = $2",
354        &[&project_id, &file_path],
355    )?;
356    let mut rows_affected = 0usize;
357    for imp in imports {
358        rows_affected += conn.execute(
359            "INSERT INTO code_imports (project_id, source_file, target_module)
360             VALUES ($1, $2, $3)
361             ON CONFLICT (project_id, source_file, target_module) DO NOTHING",
362            &[&project_id, &imp.file_path, &imp.module_name],
363        )? as usize;
364    }
365    Ok(rows_affected)
366}
367
368pub fn upsert_calls(
369    conn: &mut impl GenericClient,
370    project_id: &str,
371    file_path: &str,
372    calls: &[CallRelation],
373) -> anyhow::Result<usize> {
374    conn.execute(
375        "DELETE FROM code_calls WHERE project_id = $1 AND file_path = $2",
376        &[&project_id, &file_path],
377    )?;
378    let mut rows_affected = 0usize;
379    for call in calls {
380        rows_affected += insert_call(conn, project_id, call)?;
381    }
382    Ok(rows_affected)
383}
384
385fn insert_call(
386    conn: &mut impl GenericClient,
387    project_id: &str,
388    call: &CallRelation,
389) -> anyhow::Result<usize> {
390    let rows = conn.execute(
391        "INSERT INTO code_calls
392         (project_id, caller_symbol_id, callee_symbol_id, callee_name, \
393          callee_target_kind, callee_external_module, file_path, line)
394         VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
395         ON CONFLICT (
396            project_id, caller_symbol_id, callee_symbol_id, callee_name,
397            callee_target_kind, callee_external_module, file_path, line
398         ) DO NOTHING",
399        &[
400            &project_id,
401            &call.caller_symbol_id,
402            &call.callee_symbol_id.as_deref().unwrap_or(""),
403            &call.callee_name,
404            &call.callee_target_kind.as_str(),
405            &call.callee_external_module.as_deref().unwrap_or(""),
406            &call.file_path,
407            &to_i32(call.line),
408        ],
409    )?;
410    Ok(rows as usize)
411}
412
413/// Replace a pending `local_import` call row with its resolved form. The exact
414/// pending row (identified by its persisted columns) is deleted and the
415/// `resolved` call — a `Symbol` target on a hit, `Unresolved` on a miss — is
416/// inserted in its place. Used by the post-write local-import resolution pass.
417pub fn promote_local_import_call(
418    conn: &mut impl GenericClient,
419    project_id: &str,
420    original: &CallRelation,
421    resolved: &CallRelation,
422) -> anyhow::Result<()> {
423    conn.execute(
424        "DELETE FROM code_calls
425         WHERE project_id = $1 AND caller_symbol_id = $2 AND callee_symbol_id = ''
426           AND callee_name = $3 AND callee_target_kind = 'local_import'
427           AND callee_external_module = $4 AND file_path = $5 AND line = $6",
428        &[
429            &project_id,
430            &original.caller_symbol_id,
431            &original.callee_name,
432            &original.callee_external_module.as_deref().unwrap_or(""),
433            &original.file_path,
434            &to_i32(original.line),
435        ],
436    )?;
437    insert_call(conn, project_id, resolved)?;
438    Ok(())
439}
440
441fn to_i32(value: usize) -> i32 {
442    value.min(i32::MAX as usize) as i32
443}