Skip to main content

libgrite_git/
snapshot.rs

1//! Snapshot management via `refs/grite/snapshots/<ts>`
2//!
3//! Snapshots provide an optimization for rebuilding state without
4//! replaying the entire WAL history.
5
6use git2::{Oid, Repository, Signature};
7use libgrite_core::types::event::Event;
8use serde::{Deserialize, Serialize};
9use std::path::Path;
10
11use crate::chunk::{chunk_hash, decode_chunk, encode_chunk};
12use crate::GitError;
13
14/// Snapshot reference prefix
15pub const SNAPSHOT_REF_PREFIX: &str = "refs/grite/snapshots/";
16
17/// Maximum events per chunk in a snapshot
18pub const SNAPSHOT_CHUNK_SIZE: usize = 1000;
19
20/// Metadata stored in each snapshot commit
21#[derive(Debug, Serialize, Deserialize)]
22pub struct SnapshotMeta {
23    pub schema_version: u32,
24    pub created_ts: u64,
25    pub wal_head: String,
26    pub event_count: usize,
27    pub chunks: Vec<ChunkInfo>,
28}
29
30/// Information about a chunk in a snapshot
31#[derive(Debug, Serialize, Deserialize)]
32pub struct ChunkInfo {
33    pub path: String,
34    pub chunk_hash: String,
35    pub event_count: usize,
36}
37
38/// Reference to a snapshot
39#[derive(Debug, Clone)]
40pub struct SnapshotRef {
41    pub oid: Oid,
42    pub timestamp: u64,
43    pub ref_name: String,
44}
45
46/// Manager for snapshot operations
47pub struct SnapshotManager {
48    repo: Repository,
49}
50
51impl SnapshotManager {
52    /// Open a snapshot manager for the repository
53    pub fn open(git_dir: &Path) -> Result<Self, GitError> {
54        let repo_path = git_dir.parent().ok_or(GitError::NotARepo)?;
55        let repo = Repository::open(repo_path)?;
56        Ok(Self { repo })
57    }
58
59    /// Create a new snapshot from events
60    pub fn create(&self, wal_head: Oid, events: &[Event]) -> Result<Oid, GitError> {
61        if events.is_empty() {
62            return Err(GitError::Snapshot(
63                "Cannot create empty snapshot".to_string(),
64            ));
65        }
66
67        let now_ms = std::time::SystemTime::now()
68            .duration_since(std::time::UNIX_EPOCH)
69            .unwrap_or_default()
70            .as_millis() as u64;
71
72        // Split events into chunks
73        let mut chunks_info = Vec::new();
74        let mut tree_builder = self.repo.treebuilder(None)?;
75
76        // Create events directory
77        let mut events_builder = self.repo.treebuilder(None)?;
78
79        for (i, chunk_events) in events.chunks(SNAPSHOT_CHUNK_SIZE).enumerate() {
80            let chunk_data = encode_chunk(chunk_events)?;
81            let hash = chunk_hash(&chunk_data);
82            let hash_hex = hex::encode(hash);
83
84            let path = format!("{:04}.bin", i);
85            let blob_oid = self.repo.blob(&chunk_data)?;
86            events_builder.insert(&path, blob_oid, 0o100644)?;
87
88            chunks_info.push(ChunkInfo {
89                path: format!("events/{}", path),
90                chunk_hash: hash_hex,
91                event_count: chunk_events.len(),
92            });
93        }
94
95        let events_tree_oid = events_builder.write()?;
96        tree_builder.insert("events", events_tree_oid, 0o040000)?;
97
98        // Create snapshot.json
99        let meta = SnapshotMeta {
100            schema_version: 1,
101            created_ts: now_ms,
102            wal_head: wal_head.to_string(),
103            event_count: events.len(),
104            chunks: chunks_info,
105        };
106        let meta_json = serde_json::to_string_pretty(&meta)?;
107        let meta_blob = self.repo.blob(meta_json.as_bytes())?;
108        tree_builder.insert("snapshot.json", meta_blob, 0o100644)?;
109
110        let tree_oid = tree_builder.write()?;
111        let tree = self.repo.find_tree(tree_oid)?;
112
113        // Create commit
114        let sig = Signature::now("grite", "grit@local")?;
115        let message = format!("Snapshot: {} events at {}", events.len(), now_ms);
116
117        let ref_name = format!("{}{}", SNAPSHOT_REF_PREFIX, now_ms);
118        let commit_oid = self
119            .repo
120            .commit(Some(&ref_name), &sig, &sig, &message, &tree, &[])?;
121
122        Ok(commit_oid)
123    }
124
125    /// List all snapshots, ordered by timestamp (newest first)
126    pub fn list(&self) -> Result<Vec<SnapshotRef>, GitError> {
127        let mut snapshots = Vec::new();
128
129        for reference in self
130            .repo
131            .references_glob(&format!("{}*", SNAPSHOT_REF_PREFIX))?
132        {
133            let reference = reference?;
134            let ref_name = reference.name().unwrap_or("").to_string();
135
136            // Extract timestamp from ref name
137            let ts_str = ref_name.strip_prefix(SNAPSHOT_REF_PREFIX).unwrap_or("0");
138            let timestamp: u64 = ts_str.parse().unwrap_or(0);
139
140            if let Some(oid) = reference.target() {
141                snapshots.push(SnapshotRef {
142                    oid,
143                    timestamp,
144                    ref_name,
145                });
146            }
147        }
148
149        // Sort by timestamp descending (newest first)
150        snapshots.sort_by_key(|b| std::cmp::Reverse(b.timestamp));
151
152        Ok(snapshots)
153    }
154
155    /// Get the latest snapshot
156    pub fn latest(&self) -> Result<Option<SnapshotRef>, GitError> {
157        Ok(self.list()?.into_iter().next())
158    }
159
160    /// Read all events from a snapshot
161    pub fn read(&self, oid: Oid) -> Result<Vec<Event>, GitError> {
162        let commit = self.repo.find_commit(oid)?;
163        let tree = commit.tree()?;
164
165        // Read snapshot.json for chunk order
166        let meta_entry = tree
167            .get_name("snapshot.json")
168            .ok_or_else(|| GitError::Snapshot("Missing snapshot.json".to_string()))?;
169        let meta_blob = self.repo.find_blob(meta_entry.id())?;
170        let meta: SnapshotMeta = serde_json::from_slice(meta_blob.content())?;
171
172        // Read chunks in order
173        let mut all_events = Vec::with_capacity(meta.event_count);
174
175        let events_entry = tree
176            .get_name("events")
177            .ok_or_else(|| GitError::Snapshot("Missing events directory".to_string()))?;
178        let events_tree = self.repo.find_tree(events_entry.id())?;
179
180        for chunk_info in &meta.chunks {
181            let chunk_name = chunk_info
182                .path
183                .strip_prefix("events/")
184                .unwrap_or(&chunk_info.path);
185            let chunk_entry = events_tree
186                .get_name(chunk_name)
187                .ok_or_else(|| GitError::Snapshot(format!("Missing chunk: {}", chunk_name)))?;
188            let chunk_blob = self.repo.find_blob(chunk_entry.id())?;
189            let events = decode_chunk(chunk_blob.content())?;
190            all_events.extend(events);
191        }
192
193        Ok(all_events)
194    }
195
196    /// Check if a new snapshot should be created
197    pub fn should_create(&self, events_since_snapshot: usize, threshold: usize) -> bool {
198        events_since_snapshot >= threshold
199    }
200
201    /// Garbage collect old snapshots, keeping the N most recent
202    pub fn gc(&self, keep: usize) -> Result<GcStats, GitError> {
203        let snapshots = self.list()?;
204        let mut deleted = 0;
205
206        for snapshot in snapshots.into_iter().skip(keep) {
207            // Delete the reference
208            let mut reference = self.repo.find_reference(&snapshot.ref_name)?;
209            reference.delete()?;
210            deleted += 1;
211        }
212
213        Ok(GcStats {
214            deleted,
215            kept: keep,
216        })
217    }
218}
219
220/// Statistics from garbage collection
221#[derive(Debug)]
222pub struct GcStats {
223    pub deleted: usize,
224    pub kept: usize,
225}
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230    use libgrite_core::hash::compute_event_id;
231    use libgrite_core::types::event::EventKind;
232    use libgrite_core::types::ids::generate_issue_id;
233    use std::process::Command;
234    use tempfile::TempDir;
235
236    fn setup_test_repo() -> (TempDir, Repository) {
237        let temp = TempDir::new().unwrap();
238        Command::new("git")
239            .args(["init"])
240            .current_dir(temp.path())
241            .output()
242            .unwrap();
243        let repo = Repository::open(temp.path()).unwrap();
244        (temp, repo)
245    }
246
247    fn make_test_events(count: usize) -> Vec<Event> {
248        (0..count)
249            .map(|i| {
250                let issue_id = generate_issue_id();
251                let actor = [1u8; 16];
252                let ts_unix_ms = 1700000000000u64 + i as u64;
253                let kind = EventKind::IssueCreated {
254                    title: format!("Issue {}", i),
255                    body: "Body".to_string(),
256                    labels: vec![],
257                };
258                let event_id = compute_event_id(&issue_id, &actor, ts_unix_ms, None, &kind);
259                Event::new(event_id, issue_id, actor, ts_unix_ms, None, kind)
260            })
261            .collect()
262    }
263
264    #[test]
265    fn test_snapshot_create_and_read() {
266        let (temp, _repo) = setup_test_repo();
267        let git_dir = temp.path().join(".git");
268
269        let mgr = SnapshotManager::open(&git_dir).unwrap();
270        let events = make_test_events(5);
271
272        // Use a fake WAL head
273        let fake_wal_head = Oid::from_str("0000000000000000000000000000000000000000").unwrap();
274        let oid = mgr.create(fake_wal_head, &events).unwrap();
275
276        // Read back
277        let read_events = mgr.read(oid).unwrap();
278        assert_eq!(read_events.len(), 5);
279        for (orig, read) in events.iter().zip(read_events.iter()) {
280            assert_eq!(orig.event_id, read.event_id);
281        }
282    }
283
284    #[test]
285    fn test_snapshot_list_and_latest() {
286        let (temp, _repo) = setup_test_repo();
287        let git_dir = temp.path().join(".git");
288
289        let mgr = SnapshotManager::open(&git_dir).unwrap();
290
291        // No snapshots initially
292        assert!(mgr.list().unwrap().is_empty());
293        assert!(mgr.latest().unwrap().is_none());
294
295        // Create snapshots
296        let fake_wal = Oid::from_str("0000000000000000000000000000000000000000").unwrap();
297        mgr.create(fake_wal, &make_test_events(1)).unwrap();
298        std::thread::sleep(std::time::Duration::from_millis(10));
299        mgr.create(fake_wal, &make_test_events(2)).unwrap();
300
301        let snapshots = mgr.list().unwrap();
302        assert_eq!(snapshots.len(), 2);
303
304        // Latest should be the second one (more recent)
305        let latest = mgr.latest().unwrap().unwrap();
306        assert_eq!(latest.oid, snapshots[0].oid);
307    }
308
309    #[test]
310    fn test_snapshot_gc() {
311        let (temp, _repo) = setup_test_repo();
312        let git_dir = temp.path().join(".git");
313
314        let mgr = SnapshotManager::open(&git_dir).unwrap();
315        let fake_wal = Oid::from_str("0000000000000000000000000000000000000000").unwrap();
316
317        // Create 5 snapshots
318        for _ in 0..5 {
319            mgr.create(fake_wal, &make_test_events(1)).unwrap();
320            std::thread::sleep(std::time::Duration::from_millis(10));
321        }
322
323        assert_eq!(mgr.list().unwrap().len(), 5);
324
325        // GC keeping only 2
326        let stats = mgr.gc(2).unwrap();
327        assert_eq!(stats.deleted, 3);
328        assert_eq!(stats.kept, 2);
329
330        assert_eq!(mgr.list().unwrap().len(), 2);
331    }
332}