Skip to main content

gobby_code/
freshness.rs

1use std::path::{Path, PathBuf};
2use std::time::SystemTime;
3
4use crate::config::Context;
5use crate::db;
6use crate::index::{api, hasher};
7use crate::index_lock::{self, IndexLockPolicy, IndexLockResult};
8use crate::models::Symbol;
9use crate::visibility;
10
11const INFLIGHT_ENV: &str = "GCODE_FRESHNESS_INFLIGHT";
12
13pub enum FreshnessScope {
14    Project,
15    Files(Vec<PathBuf>),
16}
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum FreshnessStatus {
20    Checked,
21    SkippedBusy,
22}
23
24pub fn ensure_fresh(ctx: &Context, scope: FreshnessScope) -> anyhow::Result<FreshnessStatus> {
25    if std::env::var_os(INFLIGHT_ENV).is_some() {
26        return Ok(FreshnessStatus::Checked);
27    }
28
29    // Lock-free pre-gate for whole-project reads: if nothing on disk is newer
30    // than the recorded index and nothing was deleted, skip the advisory lock
31    // and the full re-hash entirely (no "refresh already running" warning).
32    // `FreshnessScope::Files` is already cheap (explicit-files path) and is left
33    // untouched.
34    if matches!(scope, FreshnessScope::Project) && !project_needs_refresh(ctx)? {
35        return Ok(FreshnessStatus::Checked);
36    }
37
38    let _guard = FreshnessGuard::enter();
39    let result =
40        index_lock::with_project_lock(ctx, IndexLockPolicy::brief_freshness_try(), || {
41            match scope {
42                FreshnessScope::Project => {
43                    api::index_files(
44                        api::IndexRequest {
45                            project_root: ctx.project_root.clone(),
46                            path_filter: None,
47                            explicit_files: Vec::new(),
48                            full: false,
49                            require_cpp_semantics: false,
50                            sync_projections: false,
51                        },
52                        ctx,
53                    )?;
54                }
55                FreshnessScope::Files(paths) => {
56                    let files: Vec<PathBuf> = paths
57                        .iter()
58                        .map(|path| normalize_file_path(&ctx.project_root, path))
59                        .map(PathBuf::from)
60                        .collect();
61                    if !files.is_empty() {
62                        api::index_files(
63                            api::IndexRequest {
64                                project_root: ctx.project_root.clone(),
65                                path_filter: None,
66                                explicit_files: files,
67                                full: false,
68                                require_cpp_semantics: false,
69                                sync_projections: false,
70                            },
71                            ctx,
72                        )?;
73                    }
74                }
75            }
76            Ok(())
77        })?;
78
79    match result {
80        IndexLockResult::Acquired(()) => Ok(FreshnessStatus::Checked),
81        IndexLockResult::Busy => Ok(FreshnessStatus::SkippedBusy),
82    }
83}
84
85/// Read-only pre-gate for whole-project freshness.
86///
87/// Returns `true` when the project must be reconciled under the advisory lock —
88/// because it has never been indexed or because the on-disk tree changed since
89/// `last_indexed_at` — and `false` when the recorded index is already current
90/// and the lock (plus the full re-hash) can be skipped. Reads only; needs the
91/// hub exactly like the existing refresh path, and propagates a hub error the
92/// same way (`--no-freshness` still bypasses it upstream).
93fn project_needs_refresh(ctx: &Context) -> anyhow::Result<bool> {
94    let mut conn = db::connect_readonly(&ctx.database_url)?;
95
96    let last_indexed_at: Option<SystemTime> = match conn.query_opt(
97        "SELECT last_indexed_at FROM code_indexed_projects WHERE id = $1",
98        &[&ctx.project_id],
99    )? {
100        Some(row) => row.try_get::<_, Option<SystemTime>>(0)?,
101        None => None,
102    };
103
104    // Never indexed (or no recorded timestamp): do not gate; let the existing
105    // refresh path build the first index.
106    let Some(last_indexed_at) = last_indexed_at else {
107        return Ok(true);
108    };
109
110    let indexed_paths = db::list_indexed_file_paths(&mut conn, &ctx.project_id)?;
111    drop(conn);
112
113    Ok(api::project_changed_since(
114        &ctx.project_root,
115        last_indexed_at,
116        &indexed_paths,
117    ))
118}
119
120pub fn ensure_symbol_fresh(ctx: &Context, id: &str) -> anyhow::Result<FreshnessStatus> {
121    if std::env::var_os(INFLIGHT_ENV).is_some() {
122        return Ok(FreshnessStatus::Checked);
123    }
124
125    let mut conn = db::connect_readonly(&ctx.database_url)?;
126    let sym = visibility::visible_symbol_by_id(&mut conn, ctx, id)?;
127    drop(conn);
128
129    let Some(sym) = sym else {
130        return Ok(FreshnessStatus::Checked);
131    };
132
133    if symbol_slice_is_current(ctx, &sym) {
134        return Ok(FreshnessStatus::Checked);
135    }
136
137    ensure_fresh(
138        ctx,
139        FreshnessScope::Files(vec![PathBuf::from(&sym.file_path)]),
140    )
141}
142
143fn symbol_slice_is_current(ctx: &Context, sym: &Symbol) -> bool {
144    if sym.content_hash.is_empty() {
145        return false;
146    }
147
148    let file_path = ctx.project_root.join(&sym.file_path);
149    let source = match std::fs::read(file_path) {
150        Ok(source) => source,
151        Err(_) => return false,
152    };
153
154    hasher::symbol_content_hash(&source, sym.byte_start, sym.byte_end)
155        .map(|hash| hash == sym.content_hash)
156        .unwrap_or(false)
157}
158
159fn normalize_file_path(root: &Path, path: &Path) -> String {
160    let abs = if path.is_absolute() {
161        path.to_path_buf()
162    } else {
163        root.join(path)
164    };
165
166    abs.canonicalize()
167        .ok()
168        .and_then(|canonical| {
169            root.canonicalize().ok().and_then(|canonical_root| {
170                canonical
171                    .strip_prefix(canonical_root)
172                    .ok()
173                    .map(Path::to_path_buf)
174            })
175        })
176        .unwrap_or_else(|| path.to_path_buf())
177        .to_string_lossy()
178        .to_string()
179}
180
181struct FreshnessGuard;
182
183impl FreshnessGuard {
184    fn enter() -> Self {
185        // SAFETY: gcode runs freshness indexing synchronously in this CLI process
186        // and restores the variable before returning to command dispatch.
187        unsafe { std::env::set_var(INFLIGHT_ENV, "1") };
188        Self
189    }
190}
191
192impl Drop for FreshnessGuard {
193    fn drop(&mut self) {
194        // SAFETY: see FreshnessGuard::enter.
195        unsafe { std::env::remove_var(INFLIGHT_ENV) };
196    }
197}
198
199#[cfg(test)]
200mod tests {
201    use super::*;
202    use crate::models::CODE_INDEX_UUID_NAMESPACE;
203    use postgres::Client;
204
205    fn context_for(root: &Path) -> Context {
206        Context {
207            database_url: "postgresql://localhost/gobby-test".to_string(),
208            project_root: root.to_path_buf(),
209            project_id: "proj".to_string(),
210            quiet: true,
211            falkordb: None,
212            qdrant: None,
213            embedding: None,
214            code_vectors: crate::config::CodeVectorSettings::default(),
215            daemon_url: None,
216            index_scope: crate::config::ProjectIndexScope::Single,
217        }
218    }
219
220    fn symbol_hash(source: &[u8], start: usize, end: usize) -> String {
221        hasher::symbol_content_hash(source, start, end).expect("symbol hash")
222    }
223
224    fn postgres_test_context(project_id: &str) -> Option<Context> {
225        let database_url = std::env::var("GCODE_POSTGRES_TEST_DATABASE_URL").ok()?;
226        match db::connect_readwrite(&database_url) {
227            Ok(_) => Some(Context {
228                database_url,
229                project_root: std::path::PathBuf::from("/tmp/gcode-freshness-lock-test"),
230                project_id: project_id.to_string(),
231                quiet: true,
232                falkordb: None,
233                qdrant: None,
234                embedding: None,
235                code_vectors: crate::config::CodeVectorSettings::default(),
236                daemon_url: None,
237                index_scope: crate::config::ProjectIndexScope::Single,
238            }),
239            Err(error) => {
240                eprintln!("skipping freshness lock test: PostgreSQL hub is unavailable: {error}");
241                None
242            }
243        }
244    }
245
246    fn postgres_context_with_root(project_id: &str, root: &Path) -> Option<Context> {
247        let database_url = std::env::var("GCODE_POSTGRES_TEST_DATABASE_URL").ok()?;
248        match db::connect_readwrite(&database_url) {
249            Ok(_) => Some(Context {
250                database_url,
251                project_root: root.to_path_buf(),
252                project_id: project_id.to_string(),
253                quiet: true,
254                falkordb: None,
255                qdrant: None,
256                embedding: None,
257                code_vectors: crate::config::CodeVectorSettings::default(),
258                daemon_url: None,
259                index_scope: crate::config::ProjectIndexScope::Single,
260            }),
261            Err(error) => {
262                eprintln!(
263                    "skipping freshness pre-gate test: PostgreSQL hub is unavailable: {error}"
264                );
265                None
266            }
267        }
268    }
269
270    fn hold_project_lock(ctx: &Context) -> Client {
271        let mut conn =
272            db::connect_readwrite(&ctx.database_url).expect("connect test PostgreSQL hub");
273        let key = crate::index_lock::project_lock_key(&ctx.project_id);
274        conn.execute("SELECT pg_advisory_lock($1)", &[&key])
275            .expect("hold project advisory lock");
276        conn
277    }
278
279    fn set_mtime(path: &Path, time: SystemTime) {
280        std::fs::File::options()
281            .read(true)
282            .write(true)
283            .open(path)
284            .expect("open file to set mtime")
285            .set_modified(time)
286            .expect("set mtime");
287    }
288
289    fn invalidate_test_project(ctx: &Context) {
290        let mut conn =
291            db::connect_readwrite(&ctx.database_url).expect("connect test PostgreSQL hub");
292        crate::index::indexer::invalidate(&mut conn, &ctx.project_id, None)
293            .expect("invalidate test project index");
294    }
295
296    fn full_index(ctx: &Context) {
297        api::index_files(
298            api::IndexRequest {
299                project_root: ctx.project_root.clone(),
300                path_filter: None,
301                explicit_files: Vec::new(),
302                full: true,
303                require_cpp_semantics: false,
304                sync_projections: false,
305            },
306            ctx,
307        )
308        .expect("full index of test project");
309    }
310
311    mod serial_db {
312        use super::*;
313
314        #[test]
315        #[serial_test::serial(serial_db)]
316        fn no_freshness_env_short_circuits_project_refresh() {
317            let tmp = tempfile::tempdir().expect("tempdir");
318            let ctx = context_for(tmp.path());
319            unsafe { std::env::set_var(INFLIGHT_ENV, "1") };
320            let result = ensure_fresh(&ctx, FreshnessScope::Project);
321            unsafe { std::env::remove_var(INFLIGHT_ENV) };
322
323            assert_eq!(result.expect("freshness status"), FreshnessStatus::Checked);
324        }
325
326        #[test]
327        #[serial_test::serial(serial_db)]
328        fn busy_project_lock_skips_freshness_refresh() {
329            let Some(ctx) = postgres_test_context("gcode-freshness-busy") else {
330                return;
331            };
332            let _holder = hold_project_lock(&ctx);
333
334            let status = ensure_fresh(&ctx, FreshnessScope::Project).expect("freshness status");
335
336            assert_eq!(status, FreshnessStatus::SkippedBusy);
337        }
338
339        #[test]
340        #[serial_test::serial(serial_db)]
341        fn pre_gate_skips_lock_when_unchanged_and_trips_after_a_change() {
342            let tmp = tempfile::tempdir().expect("tempdir");
343            let root = tmp.path();
344            std::fs::create_dir_all(root.join("src")).expect("create src");
345            let lib = root.join("src/lib.rs");
346            std::fs::write(&lib, b"fn main() {}\n").expect("write lib.rs");
347            std::fs::write(root.join("README.md"), b"# Title\n").expect("write README");
348
349            // Age the files well past the skew margin so a clean index leaves them
350            // unambiguously older than last_indexed_at, regardless of host/hub skew.
351            let aged = SystemTime::now() - std::time::Duration::from_secs(3600);
352            set_mtime(&lib, aged);
353            set_mtime(&root.join("README.md"), aged);
354
355            let Some(ctx) = postgres_context_with_root("gcode-freshness-pregate", root) else {
356                return;
357            };
358
359            // Start clean, then index so code_indexed_projects.last_indexed_at = NOW().
360            invalidate_test_project(&ctx);
361            full_index(&ctx);
362
363            // Nothing changed: the pre-gate must skip the advisory lock entirely,
364            // even while another connection holds it, and report Checked. Without
365            // the gate this would force SkippedBusy.
366            let holder = hold_project_lock(&ctx);
367            let status = ensure_fresh(&ctx, FreshnessScope::Project).expect("freshness status");
368            assert_eq!(status, FreshnessStatus::Checked);
369
370            // Touch a tracked file with a future mtime so the gate trips regardless
371            // of skew; with the lock still held it reports SkippedBusy, proving it
372            // took the lock path rather than skipping.
373            set_mtime(
374                &lib,
375                SystemTime::now() + std::time::Duration::from_secs(3600),
376            );
377            let status = ensure_fresh(&ctx, FreshnessScope::Project).expect("freshness status");
378            assert_eq!(status, FreshnessStatus::SkippedBusy);
379            drop(holder);
380
381            invalidate_test_project(&ctx);
382        }
383
384        #[test]
385        #[serial_test::serial(serial_db)]
386        fn symbol_slice_check_uses_stored_byte_range_hash() {
387            let tmp = tempfile::tempdir().expect("tempdir");
388            let source = b"fn before() {}\nfn target() {}\n";
389            std::fs::write(tmp.path().join("lib.rs"), source).expect("write file");
390            let ctx = context_for(tmp.path());
391            let start = 15;
392            let end = source.len();
393            let sym = Symbol {
394                id: uuid::Uuid::new_v5(&CODE_INDEX_UUID_NAMESPACE, b"sym").to_string(),
395                project_id: "proj".to_string(),
396                file_path: "lib.rs".to_string(),
397                name: "target".to_string(),
398                qualified_name: "target".to_string(),
399                kind: "function".to_string(),
400                language: "rust".to_string(),
401                byte_start: start,
402                byte_end: end,
403                line_start: 2,
404                line_end: 2,
405                signature: None,
406                docstring: None,
407                parent_symbol_id: None,
408                content_hash: symbol_hash(source, start, end),
409                summary: None,
410                created_at: String::new(),
411                updated_at: String::new(),
412            };
413
414            assert!(symbol_slice_is_current(&ctx, &sym));
415
416            std::fs::write(
417                tmp.path().join("lib.rs"),
418                b"// shifted\nfn before() {}\nfn target() {}\n",
419            )
420            .expect("shift file");
421            assert!(!symbol_slice_is_current(&ctx, &sym));
422        }
423    }
424}