1use postgres::GenericClient;
2use serde::{Deserialize, Serialize};
3
4pub use crate::index::indexer::{
5 IndexDegradation, IndexDurations, IndexOutcome, IndexRequest, UnsupportedFileType, index_files,
6 project_changed_since,
7};
8
9use crate::models::{
10 CallRelation, ContentChunk, ImportRelation, IndexedFile, IndexedProject, Symbol,
11};
12
13const SYMBOL_UPSERT_BATCH_SIZE: usize = 500;
14
15#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
16pub struct CodeFactWriteRequest {
17 pub project_id: String,
18 pub file_path: String,
19 pub symbols: usize,
20 pub imports: usize,
21 pub calls: usize,
22 pub chunks: usize,
23}
24
25#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
26pub struct CodeFactWriteSummary {
27 pub files_written: usize,
28 pub symbols_written: usize,
29 pub imports_written: usize,
30 pub calls_written: usize,
31 pub chunks_written: usize,
32 pub graph_sync_pending: bool,
33 pub vectors_sync_pending: bool,
34}
35
36impl CodeFactWriteSummary {
37 pub fn for_file(symbols: usize, imports: usize, calls: usize, chunks: usize) -> Self {
38 Self {
39 files_written: 1,
40 symbols_written: symbols,
41 imports_written: imports,
42 calls_written: calls,
43 chunks_written: chunks,
44 graph_sync_pending: true,
45 vectors_sync_pending: true,
46 }
47 }
48}
49
50pub fn delete_file_facts(
51 conn: &mut impl GenericClient,
52 project_id: &str,
53 file_path: &str,
54) -> anyhow::Result<()> {
55 conn.execute(
56 "DELETE FROM code_symbols WHERE project_id = $1 AND file_path = $2",
57 &[&project_id, &file_path],
58 )?;
59 conn.execute(
60 "DELETE FROM code_indexed_files WHERE project_id = $1 AND file_path = $2",
61 &[&project_id, &file_path],
62 )?;
63 conn.execute(
64 "DELETE FROM code_content_chunks WHERE project_id = $1 AND file_path = $2",
65 &[&project_id, &file_path],
66 )?;
67 conn.execute(
68 "DELETE FROM code_imports WHERE project_id = $1 AND source_file = $2",
69 &[&project_id, &file_path],
70 )?;
71 conn.execute(
72 "DELETE FROM code_calls WHERE project_id = $1 AND file_path = $2",
73 &[&project_id, &file_path],
74 )?;
75 Ok(())
76}
77
78pub fn file_facts_exist(
79 conn: &mut impl GenericClient,
80 project_id: &str,
81 file_path: &str,
82) -> anyhow::Result<bool> {
83 let row = conn.query_one(
84 "SELECT
85 EXISTS(SELECT 1 FROM code_indexed_files WHERE project_id = $1 AND file_path = $2)
86 OR EXISTS(SELECT 1 FROM code_symbols WHERE project_id = $1 AND file_path = $2)
87 OR EXISTS(SELECT 1 FROM code_content_chunks WHERE project_id = $1 AND file_path = $2)
88 OR EXISTS(SELECT 1 FROM code_imports WHERE project_id = $1 AND source_file = $2)
89 OR EXISTS(SELECT 1 FROM code_calls WHERE project_id = $1 AND file_path = $2)",
90 &[&project_id, &file_path],
91 )?;
92 Ok(row.try_get(0)?)
93}
94
95pub fn upsert_symbols(conn: &mut impl GenericClient, symbols: &[Symbol]) -> anyhow::Result<usize> {
96 for chunk in symbols.chunks(SYMBOL_UPSERT_BATCH_SIZE) {
97 let ids = chunk.iter().map(|sym| sym.id.clone()).collect::<Vec<_>>();
98 let project_ids = chunk
99 .iter()
100 .map(|sym| sym.project_id.clone())
101 .collect::<Vec<_>>();
102 let file_paths = chunk
103 .iter()
104 .map(|sym| sym.file_path.clone())
105 .collect::<Vec<_>>();
106 let names = chunk.iter().map(|sym| sym.name.clone()).collect::<Vec<_>>();
107 let qualified_names = chunk
108 .iter()
109 .map(|sym| sym.qualified_name.clone())
110 .collect::<Vec<_>>();
111 let kinds = chunk.iter().map(|sym| sym.kind.clone()).collect::<Vec<_>>();
112 let languages = chunk
113 .iter()
114 .map(|sym| sym.language.clone())
115 .collect::<Vec<_>>();
116 let byte_starts = chunk
117 .iter()
118 .map(|sym| to_i32(sym.byte_start))
119 .collect::<Vec<_>>();
120 let byte_ends = chunk
121 .iter()
122 .map(|sym| to_i32(sym.byte_end))
123 .collect::<Vec<_>>();
124 let line_starts = chunk
125 .iter()
126 .map(|sym| to_i32(sym.line_start))
127 .collect::<Vec<_>>();
128 let line_ends = chunk
129 .iter()
130 .map(|sym| to_i32(sym.line_end))
131 .collect::<Vec<_>>();
132 let signatures = chunk
133 .iter()
134 .map(|sym| sym.signature.clone())
135 .collect::<Vec<_>>();
136 let docstrings = chunk
137 .iter()
138 .map(|sym| sym.docstring.clone())
139 .collect::<Vec<_>>();
140 let parent_symbol_ids = chunk
141 .iter()
142 .map(|sym| sym.parent_symbol_id.clone())
143 .collect::<Vec<_>>();
144 let content_hashes = chunk
145 .iter()
146 .map(|sym| sym.content_hash.clone())
147 .collect::<Vec<_>>();
148 let summaries = chunk
149 .iter()
150 .map(|sym| sym.summary.clone())
151 .collect::<Vec<_>>();
152
153 conn.execute(
154 "INSERT INTO code_symbols (
155 id, project_id, file_path, name, qualified_name,
156 kind, language, byte_start, byte_end,
157 line_start, line_end, signature, docstring,
158 parent_symbol_id, content_hash, summary,
159 created_at, updated_at
160 )
161 SELECT
162 id, project_id, file_path, name, qualified_name,
163 kind, language, byte_start, byte_end,
164 line_start, line_end, signature, docstring,
165 parent_symbol_id, content_hash, summary,
166 NOW(), NOW()
167 FROM unnest(
168 $1::text[], $2::text[], $3::text[], $4::text[],
169 $5::text[], $6::text[], $7::text[], $8::int4[],
170 $9::int4[], $10::int4[], $11::int4[], $12::text[],
171 $13::text[], $14::text[], $15::text[], $16::text[]
172 ) AS t(
173 id, project_id, file_path, name, qualified_name,
174 kind, language, byte_start, byte_end,
175 line_start, line_end, signature, docstring,
176 parent_symbol_id, content_hash, summary
177 )
178 ON CONFLICT(id) DO UPDATE SET
179 name=excluded.name, qualified_name=excluded.qualified_name,
180 kind=excluded.kind, byte_start=excluded.byte_start,
181 byte_end=excluded.byte_end, line_start=excluded.line_start,
182 line_end=excluded.line_end, signature=excluded.signature,
183 docstring=excluded.docstring, parent_symbol_id=excluded.parent_symbol_id,
184 language=excluded.language, content_hash=excluded.content_hash,
185 summary=CASE WHEN excluded.content_hash != code_symbols.content_hash
186 THEN NULL ELSE code_symbols.summary END,
187 updated_at=NOW()",
188 &[
189 &ids,
190 &project_ids,
191 &file_paths,
192 &names,
193 &qualified_names,
194 &kinds,
195 &languages,
196 &byte_starts,
197 &byte_ends,
198 &line_starts,
199 &line_ends,
200 &signatures,
201 &docstrings,
202 &parent_symbol_ids,
203 &content_hashes,
204 &summaries,
205 ],
206 )?;
207 }
208 Ok(symbols.len())
209}
210
211pub fn upsert_file(conn: &mut impl GenericClient, file: &IndexedFile) -> anyhow::Result<()> {
212 conn.execute(
213 "INSERT INTO code_indexed_files (
214 id, project_id, file_path, language, content_hash,
215 symbol_count, byte_size, graph_synced, vectors_synced,
216 graph_sync_attempted_at, indexed_at
217 ) VALUES ($1,$2,$3,$4,$5,$6,$7,false,false,NULL,NOW())
218 ON CONFLICT(id) DO UPDATE SET
219 content_hash=excluded.content_hash,
220 symbol_count=excluded.symbol_count,
221 byte_size=excluded.byte_size,
222 graph_synced=false,
223 vectors_synced=false,
224 graph_sync_attempted_at=NULL,
225 indexed_at=NOW()",
226 &[
227 &file.id,
228 &file.project_id,
229 &file.file_path,
230 &file.language,
231 &file.content_hash,
232 &to_i32(file.symbol_count),
233 &to_i32(file.byte_size),
234 ],
235 )?;
236 Ok(())
237}
238
239pub fn upsert_content_chunks(
240 conn: &mut impl GenericClient,
241 chunks: &[ContentChunk],
242) -> anyhow::Result<usize> {
243 for chunk in chunks {
244 conn.execute(
245 "INSERT INTO code_content_chunks (
246 id, project_id, file_path, chunk_index,
247 line_start, line_end, content, language, created_at
248 ) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,NOW())
249 ON CONFLICT(id) DO UPDATE SET
250 content=excluded.content,
251 line_start=excluded.line_start,
252 line_end=excluded.line_end",
253 &[
254 &chunk.id,
255 &chunk.project_id,
256 &chunk.file_path,
257 &to_i32(chunk.chunk_index),
258 &to_i32(chunk.line_start),
259 &to_i32(chunk.line_end),
260 &chunk.content,
261 &chunk.language,
262 ],
263 )?;
264 }
265 Ok(chunks.len())
266}
267
268pub fn upsert_project_stats(
269 conn: &mut impl GenericClient,
270 project: &IndexedProject,
271) -> anyhow::Result<()> {
272 conn.execute(
273 "INSERT INTO code_indexed_projects (
274 id, root_path, total_files, total_symbols,
275 last_indexed_at, index_duration_ms
276 ) VALUES ($1,$2,$3,$4,NOW(),$5)
277 ON CONFLICT(id) DO UPDATE SET
278 root_path=excluded.root_path,
279 total_files=excluded.total_files,
280 total_symbols=excluded.total_symbols,
281 last_indexed_at=excluded.last_indexed_at,
282 index_duration_ms=excluded.index_duration_ms,
283 updated_at=NOW()",
284 &[
285 &project.id,
286 &project.root_path,
287 &to_i32(project.total_files),
288 &to_i32(project.total_symbols),
289 &to_i32(project.index_duration_ms as usize),
290 ],
291 )?;
292 Ok(())
293}
294
295pub fn upsert_imports(
296 conn: &mut impl GenericClient,
297 project_id: &str,
298 file_path: &str,
299 imports: &[ImportRelation],
300) -> anyhow::Result<usize> {
301 conn.execute(
302 "DELETE FROM code_imports WHERE project_id = $1 AND source_file = $2",
303 &[&project_id, &file_path],
304 )?;
305 for imp in imports {
306 conn.execute(
307 "INSERT INTO code_imports (project_id, source_file, target_module)
308 VALUES ($1, $2, $3)
309 ON CONFLICT (project_id, source_file, target_module) DO NOTHING",
310 &[&project_id, &imp.file_path, &imp.module_name],
311 )?;
312 }
313 Ok(imports.len())
314}
315
316pub fn upsert_calls(
317 conn: &mut impl GenericClient,
318 project_id: &str,
319 file_path: &str,
320 calls: &[CallRelation],
321) -> anyhow::Result<usize> {
322 conn.execute(
323 "DELETE FROM code_calls WHERE project_id = $1 AND file_path = $2",
324 &[&project_id, &file_path],
325 )?;
326 for call in calls {
327 conn.execute(
328 "INSERT INTO code_calls
329 (project_id, caller_symbol_id, callee_symbol_id, callee_name, \
330 callee_target_kind, callee_external_module, file_path, line)
331 VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
332 ON CONFLICT (
333 project_id, caller_symbol_id, callee_symbol_id, callee_name,
334 callee_target_kind, callee_external_module, file_path, line
335 ) DO NOTHING",
336 &[
337 &project_id,
338 &call.caller_symbol_id,
339 &call.callee_symbol_id.as_deref().unwrap_or(""),
340 &call.callee_name,
341 &call.callee_target_kind.as_str(),
342 &call.callee_external_module.as_deref().unwrap_or(""),
343 &call.file_path,
344 &to_i32(call.line),
345 ],
346 )?;
347 }
348 Ok(calls.len())
349}
350
351fn to_i32(value: usize) -> i32 {
352 value.min(i32::MAX as usize) as i32
353}