Skip to main content

libgrite_git/
snapshot.rs

1//! Snapshot management via `refs/grite/snapshots/<ts>`
2//!
3//! Snapshots provide an optimization for rebuilding state without
4//! replaying the entire WAL history.
5
6use std::path::Path;
7use git2::{Oid, Repository, Signature};
8use serde::{Deserialize, Serialize};
9use libgrite_core::types::event::Event;
10
11use crate::chunk::{encode_chunk, decode_chunk, chunk_hash};
12use crate::GitError;
13
14/// Snapshot reference prefix
15pub const SNAPSHOT_REF_PREFIX: &str = "refs/grite/snapshots/";
16
17/// Maximum events per chunk in a snapshot
18pub const SNAPSHOT_CHUNK_SIZE: usize = 1000;
19
20/// Metadata stored in each snapshot commit
21#[derive(Debug, Serialize, Deserialize)]
22pub struct SnapshotMeta {
23    pub schema_version: u32,
24    pub created_ts: u64,
25    pub wal_head: String,
26    pub event_count: usize,
27    pub chunks: Vec<ChunkInfo>,
28}
29
30/// Information about a chunk in a snapshot
31#[derive(Debug, Serialize, Deserialize)]
32pub struct ChunkInfo {
33    pub path: String,
34    pub chunk_hash: String,
35    pub event_count: usize,
36}
37
38/// Reference to a snapshot
39#[derive(Debug, Clone)]
40pub struct SnapshotRef {
41    pub oid: Oid,
42    pub timestamp: u64,
43    pub ref_name: String,
44}
45
46/// Manager for snapshot operations
47pub struct SnapshotManager {
48    repo: Repository,
49}
50
51impl SnapshotManager {
52    /// Open a snapshot manager for the repository
53    pub fn open(git_dir: &Path) -> Result<Self, GitError> {
54        let repo_path = git_dir.parent().ok_or(GitError::NotARepo)?;
55        let repo = Repository::open(repo_path)?;
56        Ok(Self { repo })
57    }
58
59    /// Create a new snapshot from events
60    pub fn create(&self, wal_head: Oid, events: &[Event]) -> Result<Oid, GitError> {
61        if events.is_empty() {
62            return Err(GitError::Snapshot("Cannot create empty snapshot".to_string()));
63        }
64
65        let now_ms = std::time::SystemTime::now()
66            .duration_since(std::time::UNIX_EPOCH)
67            .unwrap()
68            .as_millis() as u64;
69
70        // Split events into chunks
71        let mut chunks_info = Vec::new();
72        let mut tree_builder = self.repo.treebuilder(None)?;
73
74        // Create events directory
75        let mut events_builder = self.repo.treebuilder(None)?;
76
77        for (i, chunk_events) in events.chunks(SNAPSHOT_CHUNK_SIZE).enumerate() {
78            let chunk_data = encode_chunk(chunk_events)?;
79            let hash = chunk_hash(&chunk_data);
80            let hash_hex = hex::encode(hash);
81
82            let path = format!("{:04}.bin", i);
83            let blob_oid = self.repo.blob(&chunk_data)?;
84            events_builder.insert(&path, blob_oid, 0o100644)?;
85
86            chunks_info.push(ChunkInfo {
87                path: format!("events/{}", path),
88                chunk_hash: hash_hex,
89                event_count: chunk_events.len(),
90            });
91        }
92
93        let events_tree_oid = events_builder.write()?;
94        tree_builder.insert("events", events_tree_oid, 0o040000)?;
95
96        // Create snapshot.json
97        let meta = SnapshotMeta {
98            schema_version: 1,
99            created_ts: now_ms,
100            wal_head: wal_head.to_string(),
101            event_count: events.len(),
102            chunks: chunks_info,
103        };
104        let meta_json = serde_json::to_string_pretty(&meta)?;
105        let meta_blob = self.repo.blob(meta_json.as_bytes())?;
106        tree_builder.insert("snapshot.json", meta_blob, 0o100644)?;
107
108        let tree_oid = tree_builder.write()?;
109        let tree = self.repo.find_tree(tree_oid)?;
110
111        // Create commit
112        let sig = Signature::now("grite", "grit@local")?;
113        let message = format!("Snapshot: {} events at {}", events.len(), now_ms);
114
115        let ref_name = format!("{}{}", SNAPSHOT_REF_PREFIX, now_ms);
116        let commit_oid = self.repo.commit(
117            Some(&ref_name),
118            &sig,
119            &sig,
120            &message,
121            &tree,
122            &[],
123        )?;
124
125        Ok(commit_oid)
126    }
127
128    /// List all snapshots, ordered by timestamp (newest first)
129    pub fn list(&self) -> Result<Vec<SnapshotRef>, GitError> {
130        let mut snapshots = Vec::new();
131
132        for reference in self.repo.references_glob(&format!("{}*", SNAPSHOT_REF_PREFIX))? {
133            let reference = reference?;
134            let ref_name = reference.name().unwrap_or("").to_string();
135
136            // Extract timestamp from ref name
137            let ts_str = ref_name.strip_prefix(SNAPSHOT_REF_PREFIX).unwrap_or("0");
138            let timestamp: u64 = ts_str.parse().unwrap_or(0);
139
140            if let Some(oid) = reference.target() {
141                snapshots.push(SnapshotRef {
142                    oid,
143                    timestamp,
144                    ref_name,
145                });
146            }
147        }
148
149        // Sort by timestamp descending (newest first)
150        snapshots.sort_by(|a, b| b.timestamp.cmp(&a.timestamp));
151
152        Ok(snapshots)
153    }
154
155    /// Get the latest snapshot
156    pub fn latest(&self) -> Result<Option<SnapshotRef>, GitError> {
157        Ok(self.list()?.into_iter().next())
158    }
159
160    /// Read all events from a snapshot
161    pub fn read(&self, oid: Oid) -> Result<Vec<Event>, GitError> {
162        let commit = self.repo.find_commit(oid)?;
163        let tree = commit.tree()?;
164
165        // Read snapshot.json for chunk order
166        let meta_entry = tree.get_name("snapshot.json")
167            .ok_or_else(|| GitError::Snapshot("Missing snapshot.json".to_string()))?;
168        let meta_blob = self.repo.find_blob(meta_entry.id())?;
169        let meta: SnapshotMeta = serde_json::from_slice(meta_blob.content())?;
170
171        // Read chunks in order
172        let mut all_events = Vec::with_capacity(meta.event_count);
173
174        let events_entry = tree.get_name("events")
175            .ok_or_else(|| GitError::Snapshot("Missing events directory".to_string()))?;
176        let events_tree = self.repo.find_tree(events_entry.id())?;
177
178        for chunk_info in &meta.chunks {
179            let chunk_name = chunk_info.path.strip_prefix("events/").unwrap_or(&chunk_info.path);
180            let chunk_entry = events_tree.get_name(chunk_name)
181                .ok_or_else(|| GitError::Snapshot(format!("Missing chunk: {}", chunk_name)))?;
182            let chunk_blob = self.repo.find_blob(chunk_entry.id())?;
183            let events = decode_chunk(chunk_blob.content())?;
184            all_events.extend(events);
185        }
186
187        Ok(all_events)
188    }
189
190    /// Check if a new snapshot should be created
191    pub fn should_create(&self, events_since_snapshot: usize, threshold: usize) -> bool {
192        events_since_snapshot >= threshold
193    }
194
195    /// Garbage collect old snapshots, keeping the N most recent
196    pub fn gc(&self, keep: usize) -> Result<GcStats, GitError> {
197        let snapshots = self.list()?;
198        let mut deleted = 0;
199
200        for snapshot in snapshots.into_iter().skip(keep) {
201            // Delete the reference
202            let mut reference = self.repo.find_reference(&snapshot.ref_name)?;
203            reference.delete()?;
204            deleted += 1;
205        }
206
207        Ok(GcStats { deleted, kept: keep })
208    }
209}
210
211/// Statistics from garbage collection
212#[derive(Debug)]
213pub struct GcStats {
214    pub deleted: usize,
215    pub kept: usize,
216}
217
218#[cfg(test)]
219mod tests {
220    use super::*;
221    use libgrite_core::hash::compute_event_id;
222    use libgrite_core::types::event::EventKind;
223    use libgrite_core::types::ids::generate_issue_id;
224    use tempfile::TempDir;
225    use std::process::Command;
226
227    fn setup_test_repo() -> (TempDir, Repository) {
228        let temp = TempDir::new().unwrap();
229        Command::new("git")
230            .args(["init"])
231            .current_dir(temp.path())
232            .output()
233            .unwrap();
234        let repo = Repository::open(temp.path()).unwrap();
235        (temp, repo)
236    }
237
238    fn make_test_events(count: usize) -> Vec<Event> {
239        (0..count).map(|i| {
240            let issue_id = generate_issue_id();
241            let actor = [1u8; 16];
242            let ts_unix_ms = 1700000000000u64 + i as u64;
243            let kind = EventKind::IssueCreated {
244                title: format!("Issue {}", i),
245                body: "Body".to_string(),
246                labels: vec![],
247            };
248            let event_id = compute_event_id(&issue_id, &actor, ts_unix_ms, None, &kind);
249            Event::new(event_id, issue_id, actor, ts_unix_ms, None, kind)
250        }).collect()
251    }
252
253    #[test]
254    fn test_snapshot_create_and_read() {
255        let (temp, _repo) = setup_test_repo();
256        let git_dir = temp.path().join(".git");
257
258        let mgr = SnapshotManager::open(&git_dir).unwrap();
259        let events = make_test_events(5);
260
261        // Use a fake WAL head
262        let fake_wal_head = Oid::from_str("0000000000000000000000000000000000000000").unwrap();
263        let oid = mgr.create(fake_wal_head, &events).unwrap();
264
265        // Read back
266        let read_events = mgr.read(oid).unwrap();
267        assert_eq!(read_events.len(), 5);
268        for (orig, read) in events.iter().zip(read_events.iter()) {
269            assert_eq!(orig.event_id, read.event_id);
270        }
271    }
272
273    #[test]
274    fn test_snapshot_list_and_latest() {
275        let (temp, _repo) = setup_test_repo();
276        let git_dir = temp.path().join(".git");
277
278        let mgr = SnapshotManager::open(&git_dir).unwrap();
279
280        // No snapshots initially
281        assert!(mgr.list().unwrap().is_empty());
282        assert!(mgr.latest().unwrap().is_none());
283
284        // Create snapshots
285        let fake_wal = Oid::from_str("0000000000000000000000000000000000000000").unwrap();
286        mgr.create(fake_wal, &make_test_events(1)).unwrap();
287        std::thread::sleep(std::time::Duration::from_millis(10));
288        mgr.create(fake_wal, &make_test_events(2)).unwrap();
289
290        let snapshots = mgr.list().unwrap();
291        assert_eq!(snapshots.len(), 2);
292
293        // Latest should be the second one (more recent)
294        let latest = mgr.latest().unwrap().unwrap();
295        assert_eq!(latest.oid, snapshots[0].oid);
296    }
297
298    #[test]
299    fn test_snapshot_gc() {
300        let (temp, _repo) = setup_test_repo();
301        let git_dir = temp.path().join(".git");
302
303        let mgr = SnapshotManager::open(&git_dir).unwrap();
304        let fake_wal = Oid::from_str("0000000000000000000000000000000000000000").unwrap();
305
306        // Create 5 snapshots
307        for _ in 0..5 {
308            mgr.create(fake_wal, &make_test_events(1)).unwrap();
309            std::thread::sleep(std::time::Duration::from_millis(10));
310        }
311
312        assert_eq!(mgr.list().unwrap().len(), 5);
313
314        // GC keeping only 2
315        let stats = mgr.gc(2).unwrap();
316        assert_eq!(stats.deleted, 3);
317        assert_eq!(stats.kept, 2);
318
319        assert_eq!(mgr.list().unwrap().len(), 2);
320    }
321}