gobby_code/index/indexer/
lifecycle.rs1use std::collections::{HashMap, HashSet};
2use std::path::Path;
3
4use postgres::Client;
5
6use crate::config::Context;
7use crate::graph::code_graph;
8use crate::index::{api, hasher};
9use crate::models::IndexedProject;
10use crate::projection::sync::{self, ProjectionSyncRequest, ProjectionTarget};
11use crate::vector::code_symbols;
12
13use super::types::{IndexDegradation, IndexOutcome, IndexRequest};
14use super::util::{epoch_secs_str, relative_path};
15
16pub(super) fn cleanup_deleted_file_projections(
17 ctx: &Context,
18 file_path: &str,
19 outcome: &mut IndexOutcome,
20 file_vectors_synced: Option<bool>,
21) {
22 if let Err(error) = code_graph::delete_file_projection(ctx, file_path) {
23 push_projection_cleanup_degradation(
24 outcome,
25 file_path,
26 ProjectionTarget::Graph,
27 error.to_string(),
28 );
29 }
30
31 match ctx.qdrant.as_ref() {
32 Some(qdrant) => {
33 if let Err(error) =
34 code_symbols::delete_file_vectors(qdrant, &ctx.project_id, file_path)
35 {
36 push_projection_cleanup_degradation(
37 outcome,
38 file_path,
39 ProjectionTarget::Vectors,
40 error.to_string(),
41 );
42 }
43 }
44 None if file_vectors_synced == Some(true) => {
45 push_projection_cleanup_degradation(
46 outcome,
47 file_path,
48 ProjectionTarget::Vectors,
49 "Qdrant config is required for deleted-file vector cleanup".to_string(),
50 );
51 }
52 None => {}
53 }
54}
55
56fn push_projection_cleanup_degradation(
57 outcome: &mut IndexOutcome,
58 file_path: &str,
59 target: ProjectionTarget,
60 message: String,
61) {
62 outcome
63 .degraded
64 .push(IndexDegradation::ProjectionCleanupFailed {
65 file_path: file_path.to_string(),
66 target,
67 message,
68 });
69}
70
71pub(super) fn attach_projection_sync(outcome: &mut IndexOutcome, request: &IndexRequest) {
72 if !request.sync_projections {
73 return;
74 }
75
76 outcome.projection_sync = Some(sync::pending_after_code_fact_write(ProjectionSyncRequest {
77 project_id: outcome.project_id.clone(),
78 file_paths: outcome.indexed_file_paths.clone(),
79 targets: vec![ProjectionTarget::Graph, ProjectionTarget::Vectors],
80 }));
81}
82
83pub fn invalidate(
85 conn: &mut Client,
86 project_id: &str,
87 daemon_url: Option<&str>,
88) -> anyhow::Result<()> {
89 let mut tx = conn.transaction()?;
90 tx.execute(
91 "DELETE FROM code_calls WHERE project_id = $1",
92 &[&project_id],
93 )?;
94 tx.execute(
95 "DELETE FROM code_imports WHERE project_id = $1",
96 &[&project_id],
97 )?;
98 tx.execute(
99 "DELETE FROM code_content_chunks WHERE project_id = $1",
100 &[&project_id],
101 )?;
102 tx.execute(
103 "DELETE FROM code_indexed_files WHERE project_id = $1",
104 &[&project_id],
105 )?;
106 tx.execute(
107 "DELETE FROM code_symbols WHERE project_id = $1",
108 &[&project_id],
109 )?;
110 tx.execute(
111 "DELETE FROM code_indexed_projects WHERE id = $1",
112 &[&project_id],
113 )?;
114 tx.commit()?;
115 if let Some(url) = daemon_url {
116 notify_daemon_invalidate(url, project_id);
117 }
118 eprintln!("Invalidated code index for project {project_id}");
119
120 Ok(())
121}
122
123fn notify_daemon_invalidate(base_url: &str, project_id: &str) {
126 let client = match reqwest::blocking::Client::builder()
127 .timeout(std::time::Duration::from_secs(1))
128 .build()
129 {
130 Ok(c) => c,
131 Err(error) => {
132 eprintln!("Warning: could not build daemon invalidate HTTP client: {error}");
133 return;
134 }
135 };
136
137 let base = base_url.trim_end_matches('/');
138 let url = format!("{base}/api/code-index/invalidate");
139 match client
140 .post(&url)
141 .json(&serde_json::json!({"project_id": project_id}))
142 .send()
143 {
144 Ok(resp) if !resp.status().is_success() => {
145 eprintln!("Warning: daemon invalidate returned {}", resp.status());
146 }
147 Err(e) => {
148 eprintln!("Warning: could not notify daemon: {e}");
149 }
150 _ => {}
151 }
152}
153
154pub(super) fn refresh_project_stats(
155 conn: &mut Client,
156 root_path: &Path,
157 project_id: &str,
158 elapsed_ms: u64,
159 total_eligible_files: Option<usize>,
160) {
161 let total_files = count_rows(conn, "code_indexed_files", project_id);
162 let total_symbols = count_rows(conn, "code_symbols", project_id);
163
164 if let Err(error) = api::upsert_project_stats(
165 conn,
166 &IndexedProject {
167 id: project_id.to_string(),
168 root_path: root_path.to_string_lossy().to_string(),
169 total_files,
170 total_symbols,
171 last_indexed_at: epoch_secs_str(),
172 index_duration_ms: elapsed_ms,
173 total_eligible_files,
174 },
175 ) {
176 eprintln!(
177 "Warning: refresh_project_stats failed to upsert project stats for project {project_id} at {}: {error}",
178 root_path.display()
179 );
180 }
181}
182
183pub(super) fn get_stale_files(
184 conn: &mut Client,
185 project_id: &str,
186 current_hashes: &HashMap<String, String>,
187) -> Result<HashSet<String>, postgres::Error> {
188 let mut stale = HashSet::new();
189 let mut indexed = HashMap::new();
190 let rows = conn
191 .query(
192 "SELECT file_path, content_hash FROM code_indexed_files WHERE project_id = $1",
193 &[&project_id],
194 )
195 .map_err(|error| {
196 log::error!(
197 "failed to query indexed files for stale detection for project {project_id}: {error}"
198 );
199 error
200 })?;
201 for row in rows {
202 let file_path = match row.try_get::<_, String>("file_path") {
203 Ok(file_path) => file_path,
204 Err(error) => {
205 log::warn!(
206 "skipping malformed indexed-file stale-detection row for project {project_id}: file_path: {error}"
207 );
208 continue;
209 }
210 };
211 let content_hash = match row.try_get::<_, String>("content_hash") {
212 Ok(content_hash) => content_hash,
213 Err(error) => {
214 log::warn!(
215 "skipping malformed indexed-file stale-detection row for project {project_id}, file {file_path}: content_hash: {error}"
216 );
217 continue;
218 }
219 };
220 indexed.insert(file_path, content_hash);
221 }
222
223 for (path, hash) in current_hashes {
224 if indexed.get(path) != Some(hash) {
225 stale.insert(path.clone());
226 }
227 }
228 Ok(stale)
229}
230
231#[derive(Debug, Default, Clone, PartialEq, Eq)]
232pub(super) struct CurrentFileState {
233 pub(super) hashes: HashMap<String, String>,
234 pub(super) present_paths: HashSet<String>,
235}
236
237pub(super) fn current_file_state(
238 root_path: &Path,
239 candidates: &[std::path::PathBuf],
240 content_only: &[std::path::PathBuf],
241) -> CurrentFileState {
242 let mut state = CurrentFileState::default();
243 for path in candidates.iter().chain(content_only.iter()) {
244 if let Ok(rel) = relative_path(path, root_path) {
245 state.present_paths.insert(rel.clone());
246 match hasher::file_content_hash(path) {
247 Ok(hash) => {
248 state.hashes.insert(rel, hash);
249 }
250 Err(error) => {
251 eprintln!(
252 "Warning: failed to hash {} for incremental index detection: {error}",
253 path.display()
254 );
255 }
256 }
257 }
258 }
259 state
260}
261
262pub(super) fn get_orphan_files(
263 conn: &mut Client,
264 project_id: &str,
265 present_paths: &HashSet<String>,
266) -> Result<Vec<String>, postgres::Error> {
267 let mut orphans = Vec::new();
268 let rows = conn
269 .query(
270 "SELECT file_path FROM code_indexed_files WHERE project_id = $1",
271 &[&project_id],
272 )
273 .map_err(|error| {
274 log::error!(
275 "failed to query indexed files for orphan detection for project {project_id}: {error}"
276 );
277 error
278 })?;
279 for row in rows {
280 let file_path = match row.try_get::<_, String>("file_path") {
281 Ok(file_path) => file_path,
282 Err(error) => {
283 log::warn!(
284 "skipping malformed indexed-file orphan-detection row for project {project_id}: file_path: {error}"
285 );
286 continue;
287 }
288 };
289 if !present_paths.contains(&file_path) {
290 orphans.push(file_path);
291 }
292 }
293 Ok(orphans)
294}
295
296fn count_rows(conn: &mut Client, table: &str, project_id: &str) -> usize {
297 if !matches!(table, "code_indexed_files" | "code_symbols") {
298 return 0;
299 }
300 let sql = format!("SELECT COUNT(*)::BIGINT AS count FROM {table} WHERE project_id = $1");
301 conn.query_one(&sql, &[&project_id])
302 .ok()
303 .and_then(|row| row.try_get::<_, i64>("count").ok())
304 .unwrap_or(0) as usize
305}