1use postgres::GenericClient;
2use serde::{Deserialize, Serialize};
3
4pub use crate::index::indexer::{
5 IndexDegradation, IndexDurations, IndexOutcome, IndexRequest, index_files,
6};
7
8use crate::models::{
9 CallRelation, ContentChunk, ImportRelation, IndexedFile, IndexedProject, Symbol,
10};
11
12const SYMBOL_UPSERT_BATCH_SIZE: usize = 500;
13
14#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
15pub struct CodeFactWriteRequest {
16 pub project_id: String,
17 pub file_path: String,
18 pub symbols: usize,
19 pub imports: usize,
20 pub calls: usize,
21 pub chunks: usize,
22}
23
24#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
25pub struct CodeFactWriteSummary {
26 pub files_written: usize,
27 pub symbols_written: usize,
28 pub imports_written: usize,
29 pub calls_written: usize,
30 pub chunks_written: usize,
31 pub graph_sync_pending: bool,
32 pub vectors_sync_pending: bool,
33}
34
35impl CodeFactWriteSummary {
36 pub fn for_file(symbols: usize, imports: usize, calls: usize, chunks: usize) -> Self {
37 Self {
38 files_written: 1,
39 symbols_written: symbols,
40 imports_written: imports,
41 calls_written: calls,
42 chunks_written: chunks,
43 graph_sync_pending: true,
44 vectors_sync_pending: true,
45 }
46 }
47}
48
49pub fn delete_file_facts(
50 conn: &mut impl GenericClient,
51 project_id: &str,
52 file_path: &str,
53) -> anyhow::Result<()> {
54 conn.execute(
55 "DELETE FROM code_symbols WHERE project_id = $1 AND file_path = $2",
56 &[&project_id, &file_path],
57 )?;
58 conn.execute(
59 "DELETE FROM code_indexed_files WHERE project_id = $1 AND file_path = $2",
60 &[&project_id, &file_path],
61 )?;
62 conn.execute(
63 "DELETE FROM code_content_chunks WHERE project_id = $1 AND file_path = $2",
64 &[&project_id, &file_path],
65 )?;
66 conn.execute(
67 "DELETE FROM code_imports WHERE project_id = $1 AND source_file = $2",
68 &[&project_id, &file_path],
69 )?;
70 conn.execute(
71 "DELETE FROM code_calls WHERE project_id = $1 AND file_path = $2",
72 &[&project_id, &file_path],
73 )?;
74 Ok(())
75}
76
77pub fn upsert_symbols(conn: &mut impl GenericClient, symbols: &[Symbol]) -> anyhow::Result<usize> {
78 for chunk in symbols.chunks(SYMBOL_UPSERT_BATCH_SIZE) {
79 let ids = chunk.iter().map(|sym| sym.id.clone()).collect::<Vec<_>>();
80 let project_ids = chunk
81 .iter()
82 .map(|sym| sym.project_id.clone())
83 .collect::<Vec<_>>();
84 let file_paths = chunk
85 .iter()
86 .map(|sym| sym.file_path.clone())
87 .collect::<Vec<_>>();
88 let names = chunk.iter().map(|sym| sym.name.clone()).collect::<Vec<_>>();
89 let qualified_names = chunk
90 .iter()
91 .map(|sym| sym.qualified_name.clone())
92 .collect::<Vec<_>>();
93 let kinds = chunk.iter().map(|sym| sym.kind.clone()).collect::<Vec<_>>();
94 let languages = chunk
95 .iter()
96 .map(|sym| sym.language.clone())
97 .collect::<Vec<_>>();
98 let byte_starts = chunk
99 .iter()
100 .map(|sym| to_i32(sym.byte_start))
101 .collect::<Vec<_>>();
102 let byte_ends = chunk
103 .iter()
104 .map(|sym| to_i32(sym.byte_end))
105 .collect::<Vec<_>>();
106 let line_starts = chunk
107 .iter()
108 .map(|sym| to_i32(sym.line_start))
109 .collect::<Vec<_>>();
110 let line_ends = chunk
111 .iter()
112 .map(|sym| to_i32(sym.line_end))
113 .collect::<Vec<_>>();
114 let signatures = chunk
115 .iter()
116 .map(|sym| sym.signature.clone())
117 .collect::<Vec<_>>();
118 let docstrings = chunk
119 .iter()
120 .map(|sym| sym.docstring.clone())
121 .collect::<Vec<_>>();
122 let parent_symbol_ids = chunk
123 .iter()
124 .map(|sym| sym.parent_symbol_id.clone())
125 .collect::<Vec<_>>();
126 let content_hashes = chunk
127 .iter()
128 .map(|sym| sym.content_hash.clone())
129 .collect::<Vec<_>>();
130 let summaries = chunk
131 .iter()
132 .map(|sym| sym.summary.clone())
133 .collect::<Vec<_>>();
134
135 conn.execute(
136 "INSERT INTO code_symbols (
137 id, project_id, file_path, name, qualified_name,
138 kind, language, byte_start, byte_end,
139 line_start, line_end, signature, docstring,
140 parent_symbol_id, content_hash, summary,
141 created_at, updated_at
142 )
143 SELECT
144 id, project_id, file_path, name, qualified_name,
145 kind, language, byte_start, byte_end,
146 line_start, line_end, signature, docstring,
147 parent_symbol_id, content_hash, summary,
148 NOW(), NOW()
149 FROM unnest(
150 $1::text[], $2::text[], $3::text[], $4::text[],
151 $5::text[], $6::text[], $7::text[], $8::int4[],
152 $9::int4[], $10::int4[], $11::int4[], $12::text[],
153 $13::text[], $14::text[], $15::text[], $16::text[]
154 ) AS t(
155 id, project_id, file_path, name, qualified_name,
156 kind, language, byte_start, byte_end,
157 line_start, line_end, signature, docstring,
158 parent_symbol_id, content_hash, summary
159 )
160 ON CONFLICT(id) DO UPDATE SET
161 name=excluded.name, qualified_name=excluded.qualified_name,
162 kind=excluded.kind, byte_start=excluded.byte_start,
163 byte_end=excluded.byte_end, line_start=excluded.line_start,
164 line_end=excluded.line_end, signature=excluded.signature,
165 docstring=excluded.docstring, parent_symbol_id=excluded.parent_symbol_id,
166 language=excluded.language, content_hash=excluded.content_hash,
167 summary=CASE WHEN excluded.content_hash != code_symbols.content_hash
168 THEN NULL ELSE code_symbols.summary END,
169 updated_at=NOW()",
170 &[
171 &ids,
172 &project_ids,
173 &file_paths,
174 &names,
175 &qualified_names,
176 &kinds,
177 &languages,
178 &byte_starts,
179 &byte_ends,
180 &line_starts,
181 &line_ends,
182 &signatures,
183 &docstrings,
184 &parent_symbol_ids,
185 &content_hashes,
186 &summaries,
187 ],
188 )?;
189 }
190 Ok(symbols.len())
191}
192
193pub fn upsert_file(conn: &mut impl GenericClient, file: &IndexedFile) -> anyhow::Result<()> {
194 conn.execute(
195 "INSERT INTO code_indexed_files (
196 id, project_id, file_path, language, content_hash,
197 symbol_count, byte_size, graph_synced, vectors_synced,
198 graph_sync_attempted_at, indexed_at
199 ) VALUES ($1,$2,$3,$4,$5,$6,$7,false,false,NULL,NOW())
200 ON CONFLICT(id) DO UPDATE SET
201 content_hash=excluded.content_hash,
202 symbol_count=excluded.symbol_count,
203 byte_size=excluded.byte_size,
204 graph_synced=false,
205 vectors_synced=false,
206 graph_sync_attempted_at=NULL,
207 indexed_at=NOW()",
208 &[
209 &file.id,
210 &file.project_id,
211 &file.file_path,
212 &file.language,
213 &file.content_hash,
214 &to_i32(file.symbol_count),
215 &to_i32(file.byte_size),
216 ],
217 )?;
218 Ok(())
219}
220
221pub fn upsert_content_chunks(
222 conn: &mut impl GenericClient,
223 chunks: &[ContentChunk],
224) -> anyhow::Result<usize> {
225 for chunk in chunks {
226 conn.execute(
227 "INSERT INTO code_content_chunks (
228 id, project_id, file_path, chunk_index,
229 line_start, line_end, content, language, created_at
230 ) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,NOW())
231 ON CONFLICT(id) DO UPDATE SET
232 content=excluded.content,
233 line_start=excluded.line_start,
234 line_end=excluded.line_end",
235 &[
236 &chunk.id,
237 &chunk.project_id,
238 &chunk.file_path,
239 &to_i32(chunk.chunk_index),
240 &to_i32(chunk.line_start),
241 &to_i32(chunk.line_end),
242 &chunk.content,
243 &chunk.language,
244 ],
245 )?;
246 }
247 Ok(chunks.len())
248}
249
250pub fn upsert_project_stats(
251 conn: &mut impl GenericClient,
252 project: &IndexedProject,
253) -> anyhow::Result<()> {
254 conn.execute(
255 "INSERT INTO code_indexed_projects (
256 id, root_path, total_files, total_symbols,
257 last_indexed_at, index_duration_ms
258 ) VALUES ($1,$2,$3,$4,NOW(),$5)
259 ON CONFLICT(id) DO UPDATE SET
260 root_path=excluded.root_path,
261 total_files=excluded.total_files,
262 total_symbols=excluded.total_symbols,
263 last_indexed_at=excluded.last_indexed_at,
264 index_duration_ms=excluded.index_duration_ms,
265 updated_at=NOW()",
266 &[
267 &project.id,
268 &project.root_path,
269 &to_i32(project.total_files),
270 &to_i32(project.total_symbols),
271 &to_i32(project.index_duration_ms as usize),
272 ],
273 )?;
274 Ok(())
275}
276
277pub fn upsert_imports(
278 conn: &mut impl GenericClient,
279 project_id: &str,
280 file_path: &str,
281 imports: &[ImportRelation],
282) -> anyhow::Result<usize> {
283 conn.execute(
284 "DELETE FROM code_imports WHERE project_id = $1 AND source_file = $2",
285 &[&project_id, &file_path],
286 )?;
287 for imp in imports {
288 conn.execute(
289 "INSERT INTO code_imports (project_id, source_file, target_module)
290 VALUES ($1, $2, $3)
291 ON CONFLICT (project_id, source_file, target_module) DO NOTHING",
292 &[&project_id, &imp.file_path, &imp.module_name],
293 )?;
294 }
295 Ok(imports.len())
296}
297
298pub fn upsert_calls(
299 conn: &mut impl GenericClient,
300 project_id: &str,
301 file_path: &str,
302 calls: &[CallRelation],
303) -> anyhow::Result<usize> {
304 conn.execute(
305 "DELETE FROM code_calls WHERE project_id = $1 AND file_path = $2",
306 &[&project_id, &file_path],
307 )?;
308 for call in calls {
309 conn.execute(
310 "INSERT INTO code_calls
311 (project_id, caller_symbol_id, callee_symbol_id, callee_name, \
312 callee_target_kind, callee_external_module, file_path, line)
313 VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
314 ON CONFLICT (
315 project_id, caller_symbol_id, callee_symbol_id, callee_name,
316 callee_target_kind, callee_external_module, file_path, line
317 ) DO NOTHING",
318 &[
319 &project_id,
320 &call.caller_symbol_id,
321 &call.callee_symbol_id.as_deref().unwrap_or(""),
322 &call.callee_name,
323 &call.callee_target_kind.as_str(),
324 &call.callee_external_module.as_deref().unwrap_or(""),
325 &call.file_path,
326 &to_i32(call.line),
327 ],
328 )?;
329 }
330 Ok(calls.len())
331}
332
333fn to_i32(value: usize) -> i32 {
334 value.min(i32::MAX as usize) as i32
335}