Skip to main content

talon_core/indexing/
change_tracking.rs

1//! Change tracking types and tombstone management.
2//!
3//! Tracks file lifecycle: indexed, modified, deleted. Uses mtime comparison
4///  and tombstone tables for change detection and `--since` queries.
5use serde::{Deserialize, Serialize};
6use std::collections::BTreeMap;
7
8use crate::numeric::count_u32;
9
10mod time;
11
12pub use time::{TOMBSTONE_RETENTION_MS, now_ms, parse_since};
13
14// ── Change tracking types ───────────────────────────────────────────────────
15
16/// File state in the index.
17#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
18pub enum FileState {
19    /// File is indexed and present.
20    Active,
21    /// File was deleted but tombstoned (for `--since` queries).
22    Tombstoned,
23}
24
25/// A change entry in the change feed.
26#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
27pub struct ChangeEntry {
28    /// Vault-relative path.
29    pub path: String,
30    /// When the file was last seen/indexed (milliseconds since epoch).
31    pub last_indexed_at: u64,
32    /// File state.
33    pub state: FileState,
34}
35
36/// Change feed response.
37#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
38pub struct ChangeFeed {
39    /// Files newly indexed since the query timestamp.
40    pub added: Vec<ChangeEntry>,
41    /// Files re-indexed (modified) since the query timestamp.
42    pub modified: Vec<ChangeEntry>,
43    /// Files detected as deleted (tombstoned).
44    pub deleted: Vec<ChangeEntry>,
45}
46
47/// Tombstone entry for deleted files.
48#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
49pub struct TombstoneEntry {
50    /// Vault-relative path of the deleted file.
51    pub path: String,
52    /// When the file was detected as deleted (milliseconds since epoch).
53    pub deleted_at: u64,
54    /// When the file was last successfully indexed.
55    pub last_indexed_at: u64,
56}
57
58/// Index metadata stored alongside the database.
59#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
60pub struct IndexMetadata {
61    /// Schema version.
62    pub schema_version: u32,
63    /// When the index was last built (milliseconds since epoch).
64    pub last_indexed_at: u64,
65    /// When the index was last seen (file system check).
66    pub last_seen_at: u64,
67    /// Total number of active notes.
68    pub active_notes: u32,
69    /// Total number of chunks.
70    pub chunk_count: u32,
71    /// Total number of tombstones.
72    pub tombstone_count: u32,
73}
74
75impl Default for IndexMetadata {
76    fn default() -> Self {
77        Self {
78            schema_version: 1,
79            last_indexed_at: 0,
80            last_seen_at: 0,
81            active_notes: 0,
82            chunk_count: 0,
83            tombstone_count: 0,
84        }
85    }
86}
87
88/// Change tracking state for a single file.
89#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
90pub struct FileChangeState {
91    /// Vault-relative path.
92    pub path: String,
93    /// Last indexed timestamp (milliseconds since epoch).
94    pub last_indexed_at: u64,
95    /// Last seen timestamp (milliseconds since epoch).
96    pub last_seen_at: u64,
97    /// File modification time (milliseconds since epoch).
98    pub mtime: u64,
99    /// Whether the file is tombstoned.
100    pub tombstoned: bool,
101    /// When tombstoned, the deletion timestamp.
102    pub tombstoned_at: Option<u64>,
103}
104
105#[allow(clippy::missing_const_for_fn)]
106impl FileChangeState {
107    /// Creates a new active file state.
108    #[must_use]
109    pub fn active(path: String, mtime: u64) -> Self {
110        Self {
111            path,
112            last_indexed_at: 0,
113            last_seen_at: 0,
114            mtime,
115            tombstoned: false,
116            tombstoned_at: None,
117        }
118    }
119
120    /// Marks the file as indexed.
121    pub fn mark_indexed(&mut self, timestamp: u64) {
122        self.last_indexed_at = timestamp;
123        self.last_seen_at = timestamp;
124    }
125
126    /// Marks the file as seen (file system check).
127    pub fn mark_seen(&mut self, timestamp: u64) {
128        self.last_seen_at = timestamp;
129    }
130
131    /// Updates the mtime.
132    pub fn update_mtime(&mut self, mtime: u64) {
133        self.mtime = mtime;
134    }
135
136    /// Tombstones the file.
137    pub fn tombstone(&mut self, timestamp: u64) {
138        self.tombstoned = true;
139        self.tombstoned_at = Some(timestamp);
140    }
141
142    /// Checks if the file has been modified since last indexed.
143    ///
144    /// Returns `false` for files that have never been indexed
145    /// (`last_indexed_at == 0`). Use [`Self::last_indexed_at`] to distinguish
146    /// "never indexed" from "indexed and unmodified".
147    #[must_use]
148    pub fn is_modified(&self) -> bool {
149        self.last_indexed_at > 0 && self.mtime > self.last_indexed_at
150    }
151
152    /// Checks if the file is active (not tombstoned).
153    #[must_use]
154    pub fn is_active(&self) -> bool {
155        !self.tombstoned
156    }
157}
158
159/// Change tracking index: maps paths to their change state.
160#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
161pub struct ChangeIndex {
162    /// Maps path → change state.
163    pub states: BTreeMap<String, FileChangeState>,
164    /// Tombstoned files.
165    pub tombstones: BTreeMap<String, TombstoneEntry>,
166}
167
168impl ChangeIndex {
169    /// Registers a file as active.
170    pub fn register_active(&mut self, path: String, mtime: u64, timestamp: u64) {
171        let mut state = FileChangeState::active(path.clone(), mtime);
172        state.mark_indexed(timestamp);
173        state.mark_seen(timestamp);
174        self.states.insert(path, state);
175    }
176
177    /// Updates a file's mtime.
178    pub fn update_mtime(&mut self, path: &str, mtime: u64) {
179        if let Some(state) = self.states.get_mut(path) {
180            state.update_mtime(mtime);
181            state.mark_seen(mtime);
182        }
183    }
184
185    /// Marks a file as seen (file system check).
186    pub fn mark_seen(&mut self, path: &str, timestamp: u64) {
187        if let Some(state) = self.states.get_mut(path) {
188            state.mark_seen(timestamp);
189        }
190    }
191
192    /// Tombstones a deleted file.
193    pub fn tombstone(&mut self, path: &str, timestamp: u64) {
194        if let Some(state) = self.states.get_mut(path) {
195            state.tombstone(timestamp);
196            self.tombstones.insert(
197                path.to_string(),
198                TombstoneEntry {
199                    path: path.to_string(),
200                    deleted_at: timestamp,
201                    last_indexed_at: state.last_indexed_at,
202                },
203            );
204        }
205    }
206
207    /// Removes a file from the index (after tombstone cleanup).
208    pub fn remove(&mut self, path: &str) {
209        self.states.remove(path);
210        self.tombstones.remove(path);
211    }
212
213    /// Gets files that have changed since the given timestamp.
214    #[must_use]
215    pub fn get_changes_since(&self, since: u64) -> (Vec<String>, Vec<String>) {
216        let mut added = Vec::new();
217        let mut modified = Vec::new();
218
219        for (path, state) in &self.states {
220            if state.last_indexed_at < since && state.last_seen_at >= since {
221                if state.is_modified() {
222                    modified.push(path.clone());
223                } else {
224                    added.push(path.clone());
225                }
226            }
227        }
228
229        added.sort();
230        modified.sort();
231
232        (added, modified)
233    }
234
235    /// Gets tombstoned files.
236    #[must_use]
237    pub fn get_tombstones(&self) -> Vec<&TombstoneEntry> {
238        self.tombstones.values().collect()
239    }
240
241    /// Prunes tombstones older than the given age (in milliseconds).
242    pub fn prune_tombstones(&mut self, max_age_ms: u64, current_time: u64) -> Vec<String> {
243        let mut pruned = Vec::new();
244        self.tombstones.retain(|path, entry| {
245            if current_time - entry.deleted_at > max_age_ms {
246                pruned.push(path.clone());
247                false
248            } else {
249                true
250            }
251        });
252        pruned
253    }
254
255    /// Computes change feed for `--since` queries.
256    #[must_use]
257    pub fn compute_change_feed(&self, since: u64) -> ChangeFeed {
258        let mut added = Vec::new();
259        let mut modified = Vec::new();
260        let mut deleted = Vec::new();
261
262        for (path, state) in &self.states {
263            if state.last_seen_at >= since {
264                let entry = ChangeEntry {
265                    path: path.clone(),
266                    last_indexed_at: state.last_indexed_at,
267                    state: FileState::Active,
268                };
269                if state.is_modified() {
270                    modified.push(entry);
271                } else {
272                    added.push(entry);
273                }
274            }
275        }
276
277        for (path, entry) in &self.tombstones {
278            if entry.deleted_at >= since {
279                deleted.push(ChangeEntry {
280                    path: path.clone(),
281                    last_indexed_at: entry.last_indexed_at,
282                    state: FileState::Tombstoned,
283                });
284            }
285        }
286
287        added.sort_by_key(|e| e.path.clone());
288        modified.sort_by_key(|e| e.path.clone());
289        deleted.sort_by_key(|e| e.path.clone());
290
291        ChangeFeed {
292            added,
293            modified,
294            deleted,
295        }
296    }
297
298    /// Returns index metadata.
299    #[must_use]
300    pub fn to_metadata(&self) -> IndexMetadata {
301        IndexMetadata {
302            schema_version: 1,
303            last_indexed_at: self
304                .states
305                .values()
306                .map(|s| s.last_indexed_at)
307                .max()
308                .unwrap_or(0),
309            last_seen_at: self
310                .states
311                .values()
312                .map(|s| s.last_seen_at)
313                .max()
314                .unwrap_or(0),
315            active_notes: count_u32(self.states.values().filter(|s| s.is_active()).count()),
316            chunk_count: 0,
317            tombstone_count: count_u32(self.tombstones.len()),
318        }
319    }
320}
321
322#[cfg(test)]
323#[allow(clippy::unwrap_used, clippy::expect_used)]
324mod tests;