Skip to main content

gobby_code/index/
api.rs

1use postgres::GenericClient;
2use serde::{Deserialize, Serialize};
3
4pub use crate::index::indexer::{
5    IndexDegradation, IndexDurations, IndexOutcome, IndexRequest, index_files,
6};
7
8use crate::models::{
9    CallRelation, ContentChunk, ImportRelation, IndexedFile, IndexedProject, Symbol,
10};
11
12const SYMBOL_UPSERT_BATCH_SIZE: usize = 500;
13
14#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
15pub struct CodeFactWriteRequest {
16    pub project_id: String,
17    pub file_path: String,
18    pub symbols: usize,
19    pub imports: usize,
20    pub calls: usize,
21    pub chunks: usize,
22}
23
24#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
25pub struct CodeFactWriteSummary {
26    pub files_written: usize,
27    pub symbols_written: usize,
28    pub imports_written: usize,
29    pub calls_written: usize,
30    pub chunks_written: usize,
31    pub graph_sync_pending: bool,
32    pub vectors_sync_pending: bool,
33}
34
35impl CodeFactWriteSummary {
36    pub fn for_file(symbols: usize, imports: usize, calls: usize, chunks: usize) -> Self {
37        Self {
38            files_written: 1,
39            symbols_written: symbols,
40            imports_written: imports,
41            calls_written: calls,
42            chunks_written: chunks,
43            graph_sync_pending: true,
44            vectors_sync_pending: true,
45        }
46    }
47}
48
49pub fn delete_file_facts(
50    conn: &mut impl GenericClient,
51    project_id: &str,
52    file_path: &str,
53) -> anyhow::Result<()> {
54    conn.execute(
55        "DELETE FROM code_symbols WHERE project_id = $1 AND file_path = $2",
56        &[&project_id, &file_path],
57    )?;
58    conn.execute(
59        "DELETE FROM code_indexed_files WHERE project_id = $1 AND file_path = $2",
60        &[&project_id, &file_path],
61    )?;
62    conn.execute(
63        "DELETE FROM code_content_chunks WHERE project_id = $1 AND file_path = $2",
64        &[&project_id, &file_path],
65    )?;
66    conn.execute(
67        "DELETE FROM code_imports WHERE project_id = $1 AND source_file = $2",
68        &[&project_id, &file_path],
69    )?;
70    conn.execute(
71        "DELETE FROM code_calls WHERE project_id = $1 AND file_path = $2",
72        &[&project_id, &file_path],
73    )?;
74    Ok(())
75}
76
77pub fn upsert_symbols(conn: &mut impl GenericClient, symbols: &[Symbol]) -> anyhow::Result<usize> {
78    for chunk in symbols.chunks(SYMBOL_UPSERT_BATCH_SIZE) {
79        let ids = chunk.iter().map(|sym| sym.id.clone()).collect::<Vec<_>>();
80        let project_ids = chunk
81            .iter()
82            .map(|sym| sym.project_id.clone())
83            .collect::<Vec<_>>();
84        let file_paths = chunk
85            .iter()
86            .map(|sym| sym.file_path.clone())
87            .collect::<Vec<_>>();
88        let names = chunk.iter().map(|sym| sym.name.clone()).collect::<Vec<_>>();
89        let qualified_names = chunk
90            .iter()
91            .map(|sym| sym.qualified_name.clone())
92            .collect::<Vec<_>>();
93        let kinds = chunk.iter().map(|sym| sym.kind.clone()).collect::<Vec<_>>();
94        let languages = chunk
95            .iter()
96            .map(|sym| sym.language.clone())
97            .collect::<Vec<_>>();
98        let byte_starts = chunk
99            .iter()
100            .map(|sym| to_i32(sym.byte_start))
101            .collect::<Vec<_>>();
102        let byte_ends = chunk
103            .iter()
104            .map(|sym| to_i32(sym.byte_end))
105            .collect::<Vec<_>>();
106        let line_starts = chunk
107            .iter()
108            .map(|sym| to_i32(sym.line_start))
109            .collect::<Vec<_>>();
110        let line_ends = chunk
111            .iter()
112            .map(|sym| to_i32(sym.line_end))
113            .collect::<Vec<_>>();
114        let signatures = chunk
115            .iter()
116            .map(|sym| sym.signature.clone())
117            .collect::<Vec<_>>();
118        let docstrings = chunk
119            .iter()
120            .map(|sym| sym.docstring.clone())
121            .collect::<Vec<_>>();
122        let parent_symbol_ids = chunk
123            .iter()
124            .map(|sym| sym.parent_symbol_id.clone())
125            .collect::<Vec<_>>();
126        let content_hashes = chunk
127            .iter()
128            .map(|sym| sym.content_hash.clone())
129            .collect::<Vec<_>>();
130        let summaries = chunk
131            .iter()
132            .map(|sym| sym.summary.clone())
133            .collect::<Vec<_>>();
134
135        conn.execute(
136            "INSERT INTO code_symbols (
137                id, project_id, file_path, name, qualified_name,
138                kind, language, byte_start, byte_end,
139                line_start, line_end, signature, docstring,
140                parent_symbol_id, content_hash, summary,
141                created_at, updated_at
142            )
143            SELECT
144                id, project_id, file_path, name, qualified_name,
145                kind, language, byte_start, byte_end,
146                line_start, line_end, signature, docstring,
147                parent_symbol_id, content_hash, summary,
148                NOW(), NOW()
149            FROM unnest(
150                $1::text[], $2::text[], $3::text[], $4::text[],
151                $5::text[], $6::text[], $7::text[], $8::int4[],
152                $9::int4[], $10::int4[], $11::int4[], $12::text[],
153                $13::text[], $14::text[], $15::text[], $16::text[]
154            ) AS t(
155                id, project_id, file_path, name, qualified_name,
156                kind, language, byte_start, byte_end,
157                line_start, line_end, signature, docstring,
158                parent_symbol_id, content_hash, summary
159            )
160            ON CONFLICT(id) DO UPDATE SET
161                name=excluded.name, qualified_name=excluded.qualified_name,
162                kind=excluded.kind, byte_start=excluded.byte_start,
163                byte_end=excluded.byte_end, line_start=excluded.line_start,
164                line_end=excluded.line_end, signature=excluded.signature,
165                docstring=excluded.docstring, parent_symbol_id=excluded.parent_symbol_id,
166                language=excluded.language, content_hash=excluded.content_hash,
167                summary=CASE WHEN excluded.content_hash != code_symbols.content_hash
168                             THEN NULL ELSE code_symbols.summary END,
169                updated_at=NOW()",
170            &[
171                &ids,
172                &project_ids,
173                &file_paths,
174                &names,
175                &qualified_names,
176                &kinds,
177                &languages,
178                &byte_starts,
179                &byte_ends,
180                &line_starts,
181                &line_ends,
182                &signatures,
183                &docstrings,
184                &parent_symbol_ids,
185                &content_hashes,
186                &summaries,
187            ],
188        )?;
189    }
190    Ok(symbols.len())
191}
192
193pub fn upsert_file(conn: &mut impl GenericClient, file: &IndexedFile) -> anyhow::Result<()> {
194    conn.execute(
195        "INSERT INTO code_indexed_files (
196            id, project_id, file_path, language, content_hash,
197            symbol_count, byte_size, graph_synced, vectors_synced,
198            graph_sync_attempted_at, indexed_at
199        ) VALUES ($1,$2,$3,$4,$5,$6,$7,false,false,NULL,NOW())
200        ON CONFLICT(id) DO UPDATE SET
201            content_hash=excluded.content_hash,
202            symbol_count=excluded.symbol_count,
203            byte_size=excluded.byte_size,
204            graph_synced=false,
205            vectors_synced=false,
206            graph_sync_attempted_at=NULL,
207            indexed_at=NOW()",
208        &[
209            &file.id,
210            &file.project_id,
211            &file.file_path,
212            &file.language,
213            &file.content_hash,
214            &to_i32(file.symbol_count),
215            &to_i32(file.byte_size),
216        ],
217    )?;
218    Ok(())
219}
220
221pub fn upsert_content_chunks(
222    conn: &mut impl GenericClient,
223    chunks: &[ContentChunk],
224) -> anyhow::Result<usize> {
225    for chunk in chunks {
226        conn.execute(
227            "INSERT INTO code_content_chunks (
228                id, project_id, file_path, chunk_index,
229                line_start, line_end, content, language, created_at
230            ) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,NOW())
231            ON CONFLICT(id) DO UPDATE SET
232                content=excluded.content,
233                line_start=excluded.line_start,
234                line_end=excluded.line_end",
235            &[
236                &chunk.id,
237                &chunk.project_id,
238                &chunk.file_path,
239                &to_i32(chunk.chunk_index),
240                &to_i32(chunk.line_start),
241                &to_i32(chunk.line_end),
242                &chunk.content,
243                &chunk.language,
244            ],
245        )?;
246    }
247    Ok(chunks.len())
248}
249
250pub fn upsert_project_stats(
251    conn: &mut impl GenericClient,
252    project: &IndexedProject,
253) -> anyhow::Result<()> {
254    conn.execute(
255        "INSERT INTO code_indexed_projects (
256            id, root_path, total_files, total_symbols,
257            last_indexed_at, index_duration_ms
258        ) VALUES ($1,$2,$3,$4,NOW(),$5)
259        ON CONFLICT(id) DO UPDATE SET
260            root_path=excluded.root_path,
261            total_files=excluded.total_files,
262            total_symbols=excluded.total_symbols,
263            last_indexed_at=excluded.last_indexed_at,
264            index_duration_ms=excluded.index_duration_ms,
265            updated_at=NOW()",
266        &[
267            &project.id,
268            &project.root_path,
269            &to_i32(project.total_files),
270            &to_i32(project.total_symbols),
271            &to_i32(project.index_duration_ms as usize),
272        ],
273    )?;
274    Ok(())
275}
276
277pub fn upsert_imports(
278    conn: &mut impl GenericClient,
279    project_id: &str,
280    file_path: &str,
281    imports: &[ImportRelation],
282) -> anyhow::Result<usize> {
283    conn.execute(
284        "DELETE FROM code_imports WHERE project_id = $1 AND source_file = $2",
285        &[&project_id, &file_path],
286    )?;
287    for imp in imports {
288        conn.execute(
289            "INSERT INTO code_imports (project_id, source_file, target_module)
290             VALUES ($1, $2, $3)
291             ON CONFLICT (project_id, source_file, target_module) DO NOTHING",
292            &[&project_id, &imp.file_path, &imp.module_name],
293        )?;
294    }
295    Ok(imports.len())
296}
297
298pub fn upsert_calls(
299    conn: &mut impl GenericClient,
300    project_id: &str,
301    file_path: &str,
302    calls: &[CallRelation],
303) -> anyhow::Result<usize> {
304    conn.execute(
305        "DELETE FROM code_calls WHERE project_id = $1 AND file_path = $2",
306        &[&project_id, &file_path],
307    )?;
308    for call in calls {
309        conn.execute(
310            "INSERT INTO code_calls
311             (project_id, caller_symbol_id, callee_symbol_id, callee_name, \
312              callee_target_kind, callee_external_module, file_path, line)
313             VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
314             ON CONFLICT (
315                project_id, caller_symbol_id, callee_symbol_id, callee_name,
316                callee_target_kind, callee_external_module, file_path, line
317             ) DO NOTHING",
318            &[
319                &project_id,
320                &call.caller_symbol_id,
321                &call.callee_symbol_id.as_deref().unwrap_or(""),
322                &call.callee_name,
323                &call.callee_target_kind.as_str(),
324                &call.callee_external_module.as_deref().unwrap_or(""),
325                &call.file_path,
326                &to_i32(call.line),
327            ],
328        )?;
329    }
330    Ok(calls.len())
331}
332
333fn to_i32(value: usize) -> i32 {
334    value.min(i32::MAX as usize) as i32
335}