Skip to main content

gobby_code/index/indexer/
freshness_probe.rs

1//! Lock-free, hash-free freshness pre-gate.
2//!
3//! `project_changed_since` answers one question without taking the per-project
4//! advisory lock and without hashing any file: has anything under the project
5//! root changed since the recorded `last_indexed_at`? Read-time freshness calls
6//! this *before* the lock so the common no-change case is cheap and never prints
7//! "refresh already running". When it reports a change, the caller falls through
8//! to the existing lock + incremental reconcile, which is exactly as correct as
9//! before.
10
11use std::path::Path;
12use std::time::{Duration, SystemTime};
13
14use crate::index::walker;
15
16use super::util::DEFAULT_EXCLUDES;
17
18/// Clock-skew / mtime-granularity margin. Subtracted from `last_indexed_at`
19/// before comparing file mtimes, so the gate only ever errs toward refreshing
20/// and can never miss a real change. Absorbs host-vs-PostgreSQL (docker) clock
21/// skew and same-second mtime granularity; anything beyond it is reconciled by
22/// the periodic maintenance full re-hash sweep.
23const SKEW_MARGIN: Duration = Duration::from_secs(2);
24
25/// Returns `true` if any discovered file is newer than `last_indexed_at` (a
26/// modify or add) or any previously indexed path no longer exists on disk (a
27/// delete or rename), and `false` only when the on-disk tree still matches the
28/// recorded index. A `false` result lets the caller skip the advisory lock and
29/// the full re-hash entirely.
30///
31/// Discovery mirrors the indexer (`walker::discover_files` with
32/// `DEFAULT_EXCLUDES`), so the `.gobby/plans/**/*.md` allowlist and every other
33/// exclusion stay in lockstep with what actually gets indexed — including the
34/// internal `.gobby/plans/*.md` edits the daemon trigger never forwards.
35/// Short-circuits on the first sign of change.
36pub fn project_changed_since(
37    project_root: &Path,
38    last_indexed_at: SystemTime,
39    indexed_paths: &[String],
40) -> bool {
41    let threshold = last_indexed_at
42        .checked_sub(SKEW_MARGIN)
43        .unwrap_or(last_indexed_at);
44
45    let excludes: Vec<String> = DEFAULT_EXCLUDES.iter().map(|s| s.to_string()).collect();
46    let (candidates, content_only) = walker::discover_files(project_root, &excludes);
47
48    // Modify / add: a discovered file whose mtime is newer than the threshold.
49    // A freshly added file also carries a recent mtime, so adds are caught here
50    // without a fragile path-set diff. An unreadable mtime is treated as a
51    // change, so we never skip a refresh for a file we cannot stat.
52    for path in candidates.iter().chain(content_only.iter()) {
53        match path.metadata().and_then(|meta| meta.modified()) {
54            Ok(modified) if modified <= threshold => {}
55            _ => return true,
56        }
57    }
58
59    // Delete / rename: a path recorded in the index that is gone from disk.
60    indexed_paths
61        .iter()
62        .any(|rel| !project_root.join(rel).exists())
63}
64
65#[cfg(test)]
66mod tests {
67    use super::*;
68    use std::fs::File;
69    use std::path::PathBuf;
70
71    fn write_file(root: &Path, rel: &str, contents: &[u8]) -> PathBuf {
72        let path = root.join(rel);
73        if let Some(parent) = path.parent() {
74            std::fs::create_dir_all(parent).expect("create parent");
75        }
76        std::fs::write(&path, contents).expect("write file");
77        path
78    }
79
80    fn set_mtime(path: &Path, time: SystemTime) {
81        File::options()
82            .write(true)
83            .open(path)
84            .expect("open file to set mtime")
85            .set_modified(time)
86            .expect("set mtime");
87    }
88
89    /// A fixed, whole-second base instant well in the past, so the arithmetic
90    /// never underflows and 1-second-granularity filesystems round-trip it.
91    fn base_time() -> SystemTime {
92        SystemTime::UNIX_EPOCH + Duration::from_secs(1_700_000_000)
93    }
94
95    #[test]
96    fn reports_no_change_when_everything_predates_last_index() {
97        let tmp = tempfile::tempdir().expect("tempdir");
98        let root = tmp.path();
99        let lib = write_file(root, "src/lib.rs", b"fn main() {}\n");
100        let readme = write_file(root, "README.md", b"# Title\n");
101
102        let base = base_time();
103        set_mtime(&lib, base);
104        set_mtime(&readme, base);
105
106        // last_indexed_at is well after every file's mtime.
107        let last = base + Duration::from_secs(3600);
108        let indexed = vec!["src/lib.rs".to_string(), "README.md".to_string()];
109
110        assert!(!project_changed_since(root, last, &indexed));
111    }
112
113    #[test]
114    fn reports_change_when_a_file_is_modified_after_last_index() {
115        let tmp = tempfile::tempdir().expect("tempdir");
116        let root = tmp.path();
117        let lib = write_file(root, "src/lib.rs", b"fn main() {}\n");
118        set_mtime(&lib, base_time() + Duration::from_secs(7200));
119
120        let last = base_time() + Duration::from_secs(3600);
121        let indexed = vec!["src/lib.rs".to_string()];
122
123        assert!(project_changed_since(root, last, &indexed));
124    }
125
126    #[test]
127    fn reports_change_for_newly_added_file() {
128        // A new (unindexed) file carries a recent mtime, so the modify/add scan
129        // trips even though it is absent from indexed_paths.
130        let tmp = tempfile::tempdir().expect("tempdir");
131        let root = tmp.path();
132        let added = write_file(root, "src/new.rs", b"fn added() {}\n");
133        set_mtime(&added, base_time() + Duration::from_secs(7200));
134
135        let last = base_time() + Duration::from_secs(3600);
136        let indexed: Vec<String> = Vec::new();
137
138        assert!(project_changed_since(root, last, &indexed));
139    }
140
141    #[test]
142    fn reports_change_when_indexed_file_is_deleted() {
143        let tmp = tempfile::tempdir().expect("tempdir");
144        let root = tmp.path();
145        let lib = write_file(root, "src/lib.rs", b"fn main() {}\n");
146        set_mtime(&lib, base_time());
147
148        let last = base_time() + Duration::from_secs(3600);
149        // "src/gone.rs" is recorded as indexed but no longer exists on disk.
150        let indexed = vec!["src/lib.rs".to_string(), "src/gone.rs".to_string()];
151
152        assert!(project_changed_since(root, last, &indexed));
153    }
154
155    #[test]
156    fn skew_margin_boundary_only_ever_makes_the_gate_more_eager() {
157        let tmp = tempfile::tempdir().expect("tempdir");
158        let root = tmp.path();
159        let lib = write_file(root, "src/lib.rs", b"fn main() {}\n");
160        let mtime = base_time();
161        set_mtime(&lib, mtime);
162        let indexed = vec!["src/lib.rs".to_string()];
163
164        // File is 1s older than last_indexed_at — inside the 2s margin, so the
165        // gate refreshes (threshold = last - 2s = mtime - 1s < mtime).
166        let within_margin = mtime + Duration::from_secs(1);
167        assert!(project_changed_since(root, within_margin, &indexed));
168
169        // File sits exactly at the boundary (threshold == mtime, mtime <=
170        // threshold), so it counts as unchanged.
171        let at_margin = mtime + SKEW_MARGIN;
172        assert!(!project_changed_since(root, at_margin, &indexed));
173
174        // File is 3s older than last_indexed_at — beyond the 2s margin, so the
175        // gate skips (threshold = last - 2s = mtime + 1s >= mtime).
176        let beyond_margin = mtime + Duration::from_secs(3);
177        assert!(!project_changed_since(root, beyond_margin, &indexed));
178    }
179}