Skip to main content

lore_cli/sync/
store.rs

1//! Consolidated session-blob pipeline for git-ref sync.
2//!
3//! A [`SessionRecord`] holds the complete reasoning record for a single
4//! session: the session row itself plus its messages, commit links, tags,
5//! annotations, and optional summary. Encrypting the full record means a
6//! teammate who pulls the repo can run `lore blame` and recover the
7//! commit-to-reasoning linkage.
8//!
9//! The on-disk blob pipeline is `serde_json -> gzip -> encrypt`. Compression
10//! happens before encryption because ciphertext does not compress. The output
11//! is raw bytes suitable for writing directly as a git blob (no base64).
12
13use std::io::{Read, Write};
14
15use flate2::read::GzDecoder;
16use flate2::write::GzEncoder;
17use flate2::Compression;
18use serde::{Deserialize, Serialize};
19
20use super::encryption::{decrypt_data, encrypt_data};
21use super::SyncError;
22use crate::storage::models::{Annotation, Message, Session, SessionLink, Summary, Tag, Tombstone};
23
24/// The complete reasoning record for a single session.
25///
26/// This is the unit that gets serialized, compressed, encrypted, and written as
27/// a single git blob (`sessions/<uuid>.enc`). It is reconstructed verbatim on
28/// the receiving machine so the full reasoning history, including commit links,
29/// rides along with the code.
30#[derive(Clone, Serialize, Deserialize)]
31pub struct SessionRecord {
32    /// The session metadata row.
33    pub session: Session,
34
35    /// All messages belonging to the session, in conversation order.
36    pub messages: Vec<Message>,
37
38    /// Links from the session to git commits, branches, or pull requests.
39    pub links: Vec<SessionLink>,
40
41    /// User-applied tags categorizing the session.
42    pub tags: Vec<Tag>,
43
44    /// User-created annotations attached to the session.
45    pub annotations: Vec<Annotation>,
46
47    /// The session summary, if one has been generated.
48    pub summary: Option<Summary>,
49}
50
51/// Manual `Debug` that prints only non-sensitive metadata.
52///
53/// The derived `Debug` would print plaintext message content, annotation text,
54/// and summary text. Because a `SessionRecord` holds the full decrypted
55/// reasoning history, a future failure log that formatted one could leak private
56/// reasoning. This impl emits only counts and identifiers, never user content.
57impl std::fmt::Debug for SessionRecord {
58    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
59        f.debug_struct("SessionRecord")
60            .field("session_id", &self.session.id)
61            .field("tool", &self.session.tool)
62            .field("message_count", &self.messages.len())
63            .field("link_count", &self.links.len())
64            .field("tag_count", &self.tags.len())
65            .field("annotation_count", &self.annotations.len())
66            .field("has_summary", &self.summary.is_some())
67            .finish()
68    }
69}
70
71/// Serializes and encrypts a session record into a git-blob-ready byte buffer.
72///
73/// The pipeline is `serde_json -> gzip -> encrypt_data`. The returned bytes are
74/// the raw blob contents to hand to `git hash-object`.
75///
76/// # Arguments
77///
78/// * `record` - The full reasoning record to encode
79/// * `key` - The 32-byte encryption key derived from the store passphrase
80pub fn encrypt_session_record(record: &SessionRecord, key: &[u8]) -> Result<Vec<u8>, SyncError> {
81    let json = serde_json::to_vec(record)
82        .map_err(|e| SyncError::Serialization(format!("Failed to serialize record: {e}")))?;
83
84    let compressed = gzip_compress(&json)?;
85
86    encrypt_data(&compressed, key)
87}
88
89/// Decrypts and deserializes a session record from git-blob bytes.
90///
91/// Inverse of [`encrypt_session_record`]: `decrypt_data -> gunzip ->
92/// serde_json`.
93///
94/// # Arguments
95///
96/// * `blob` - The raw blob bytes as read from `git cat-file blob`
97/// * `key` - The 32-byte encryption key derived from the store passphrase
98pub fn decrypt_session_record(blob: &[u8], key: &[u8]) -> Result<SessionRecord, SyncError> {
99    let compressed = decrypt_data(blob, key)?;
100
101    let json = gzip_decompress(&compressed)?;
102
103    serde_json::from_slice(&json)
104        .map_err(|e| SyncError::Serialization(format!("Failed to deserialize record: {e}")))
105}
106
107/// Serializes and encrypts the tombstone set into a git-blob-ready buffer.
108///
109/// Stored at `meta/tombstones` in the store tree so deletions of child records
110/// propagate across machines alongside the session blobs. Uses the identical
111/// `serde_json -> gzip -> encrypt_data` pipeline as [`encrypt_session_record`].
112pub fn encrypt_tombstones(tombstones: &[Tombstone], key: &[u8]) -> Result<Vec<u8>, SyncError> {
113    let json = serde_json::to_vec(tombstones)
114        .map_err(|e| SyncError::Serialization(format!("Failed to serialize tombstones: {e}")))?;
115
116    let compressed = gzip_compress(&json)?;
117
118    encrypt_data(&compressed, key)
119}
120
121/// Decrypts and deserializes the tombstone set from git-blob bytes.
122///
123/// Inverse of [`encrypt_tombstones`].
124pub fn decrypt_tombstones(blob: &[u8], key: &[u8]) -> Result<Vec<Tombstone>, SyncError> {
125    let compressed = decrypt_data(blob, key)?;
126
127    let json = gzip_decompress(&compressed)?;
128
129    serde_json::from_slice(&json)
130        .map_err(|e| SyncError::Serialization(format!("Failed to deserialize tombstones: {e}")))
131}
132
133/// Compresses bytes with gzip at the default compression level.
134fn gzip_compress(data: &[u8]) -> Result<Vec<u8>, SyncError> {
135    let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
136    encoder
137        .write_all(data)
138        .map_err(|e| SyncError::Compression(format!("Gzip write failed: {e}")))?;
139    encoder
140        .finish()
141        .map_err(|e| SyncError::Compression(format!("Gzip finish failed: {e}")))
142}
143
144/// Decompresses gzip bytes produced by [`gzip_compress`].
145fn gzip_decompress(data: &[u8]) -> Result<Vec<u8>, SyncError> {
146    let mut decoder = GzDecoder::new(data);
147    let mut out = Vec::new();
148    decoder
149        .read_to_end(&mut out)
150        .map_err(|e| SyncError::Compression(format!("Gzip read failed: {e}")))?;
151    Ok(out)
152}
153
154#[cfg(test)]
155mod tests {
156    use super::*;
157    use crate::storage::models::{LinkCreator, LinkType, MessageContent, MessageRole};
158    use crate::sync::encryption::{derive_key, generate_salt};
159    use chrono::Utc;
160    use uuid::Uuid;
161
162    fn sample_record() -> SessionRecord {
163        let session_id = Uuid::new_v4();
164        let session = Session {
165            id: session_id,
166            tool: "claude-code".to_string(),
167            tool_version: Some("2.0.0".to_string()),
168            started_at: Utc::now(),
169            ended_at: Some(Utc::now()),
170            model: Some("claude-opus".to_string()),
171            working_directory: "/home/user/project".to_string(),
172            git_branch: Some("main".to_string()),
173            source_path: Some("/sessions/a.jsonl".to_string()),
174            message_count: 2,
175            machine_id: Some("machine-1".to_string()),
176        };
177
178        let messages = vec![
179            Message {
180                id: Uuid::new_v4(),
181                session_id,
182                parent_id: None,
183                index: 0,
184                timestamp: Utc::now(),
185                role: MessageRole::User,
186                content: MessageContent::Text("Fix the bug".to_string()),
187                model: None,
188                git_branch: Some("main".to_string()),
189                cwd: Some("/home/user/project".to_string()),
190            },
191            Message {
192                id: Uuid::new_v4(),
193                session_id,
194                parent_id: None,
195                index: 1,
196                timestamp: Utc::now(),
197                role: MessageRole::Assistant,
198                content: MessageContent::Text("Done.".to_string()),
199                model: Some("claude-opus".to_string()),
200                git_branch: Some("main".to_string()),
201                cwd: Some("/home/user/project".to_string()),
202            },
203        ];
204
205        let links = vec![SessionLink {
206            id: Uuid::new_v4(),
207            session_id,
208            link_type: LinkType::Commit,
209            commit_sha: Some("abc123".to_string()),
210            branch: Some("main".to_string()),
211            remote: Some("origin".to_string()),
212            created_at: Utc::now(),
213            created_by: LinkCreator::User,
214            confidence: Some(0.95),
215        }];
216
217        let tags = vec![Tag {
218            id: Uuid::new_v4(),
219            session_id,
220            label: "bug-fix".to_string(),
221            created_at: Utc::now(),
222        }];
223
224        let annotations = vec![Annotation {
225            id: Uuid::new_v4(),
226            session_id,
227            content: "Important fix".to_string(),
228            created_at: Utc::now(),
229        }];
230
231        let summary = Some(Summary {
232            id: Uuid::new_v4(),
233            session_id,
234            content: "Fixed a bug in the parser".to_string(),
235            generated_at: Utc::now(),
236        });
237
238        SessionRecord {
239            session,
240            messages,
241            links,
242            tags,
243            annotations,
244            summary,
245        }
246    }
247
248    #[test]
249    fn test_gzip_roundtrip() {
250        let data = b"the quick brown fox jumps over the lazy dog".repeat(100);
251        let compressed = gzip_compress(&data).unwrap();
252        let decompressed = gzip_decompress(&compressed).unwrap();
253        assert_eq!(decompressed, data);
254    }
255
256    #[test]
257    fn test_gzip_compresses_repetitive_data() {
258        // Highly repetitive data should shrink under gzip.
259        let data = vec![b'a'; 10_000];
260        let compressed = gzip_compress(&data).unwrap();
261        assert!(compressed.len() < data.len());
262    }
263
264    #[test]
265    fn test_encrypt_decrypt_record_roundtrip() {
266        let salt = generate_salt();
267        let key = derive_key("test passphrase", &salt).unwrap();
268
269        let record = sample_record();
270        let blob = encrypt_session_record(&record, &key).unwrap();
271        let restored = decrypt_session_record(&blob, &key).unwrap();
272
273        assert_eq!(restored.session.id, record.session.id);
274        assert_eq!(restored.messages.len(), record.messages.len());
275        assert_eq!(restored.messages[0].content.text(), "Fix the bug");
276        assert_eq!(restored.links.len(), 1);
277        assert_eq!(restored.links[0].commit_sha, Some("abc123".to_string()));
278        assert_eq!(restored.tags[0].label, "bug-fix");
279        assert_eq!(restored.annotations[0].content, "Important fix");
280        assert_eq!(
281            restored.summary.unwrap().content,
282            "Fixed a bug in the parser"
283        );
284    }
285
286    #[test]
287    fn test_full_record_serialization_preserves_all_fields() {
288        // Serialize to JSON and back without the crypto layer to verify the
289        // record type captures every part of the reasoning record.
290        let record = sample_record();
291        let json = serde_json::to_vec(&record).unwrap();
292        let restored: SessionRecord = serde_json::from_slice(&json).unwrap();
293
294        assert_eq!(restored.session.tool, "claude-code");
295        assert_eq!(restored.messages.len(), 2);
296        assert_eq!(restored.links.len(), 1);
297        assert_eq!(restored.tags.len(), 1);
298        assert_eq!(restored.annotations.len(), 1);
299        assert!(restored.summary.is_some());
300    }
301
302    #[test]
303    fn test_decrypt_record_wrong_key_fails() {
304        let salt = generate_salt();
305        let key = derive_key("passphrase1", &salt).unwrap();
306        let wrong_key = derive_key("passphrase2", &salt).unwrap();
307
308        let record = sample_record();
309        let blob = encrypt_session_record(&record, &key).unwrap();
310
311        let result = decrypt_session_record(&blob, &wrong_key);
312        assert!(result.is_err());
313    }
314
315    #[test]
316    fn test_debug_does_not_leak_plaintext() {
317        // The manual Debug impl must not expose message content, annotation
318        // text, or summary text, which would leak private reasoning into logs.
319        let record = sample_record();
320        let debug = format!("{record:?}");
321
322        assert!(!debug.contains("Fix the bug"));
323        assert!(!debug.contains("Important fix"));
324        assert!(!debug.contains("Fixed a bug in the parser"));
325
326        // It should still surface harmless metadata for diagnostics.
327        assert!(debug.contains("SessionRecord"));
328        assert!(debug.contains("claude-code"));
329        assert!(debug.contains("message_count"));
330        assert!(debug.contains("has_summary"));
331    }
332
333    /// Initializes a temp git repo for tests that cross the git blob boundary.
334    ///
335    /// Signing is irrelevant here (no commits are made), but identity is set for
336    /// consistency with the gitref test helpers.
337    fn init_test_repo(repo: &std::path::Path) {
338        for args in [
339            vec!["init", "-q"],
340            vec!["config", "user.name", "Lore Test"],
341            vec!["config", "user.email", "test@example.com"],
342        ] {
343            let status = std::process::Command::new("git")
344                .current_dir(repo)
345                .args(&args)
346                .status()
347                .expect("failed to spawn git");
348            assert!(status.success(), "git {args:?} failed");
349        }
350    }
351
352    #[test]
353    fn test_cross_module_round_trip_through_git_blob() {
354        use crate::sync::gitref;
355
356        let dir = tempfile::tempdir().unwrap();
357        let repo = dir.path();
358        init_test_repo(repo);
359
360        let salt = generate_salt();
361        let key = derive_key("cross module passphrase", &salt).unwrap();
362
363        let record = sample_record();
364        let blob = encrypt_session_record(&record, &key).unwrap();
365
366        // Cross the real git object boundary: hash-object then cat-file.
367        let sha = gitref::write_blob(repo, &blob).unwrap();
368        let read_back = gitref::read_blob(repo, &sha).unwrap();
369        assert_eq!(read_back, blob, "git blob round-trip must be byte-exact");
370
371        let restored = decrypt_session_record(&read_back, &key).unwrap();
372        assert_eq!(restored.session.id, record.session.id);
373        assert_eq!(restored.messages.len(), record.messages.len());
374        assert_eq!(restored.messages[0].content.text(), "Fix the bug");
375        assert_eq!(restored.links[0].commit_sha, Some("abc123".to_string()));
376        assert_eq!(
377            restored.summary.unwrap().content,
378            "Fixed a bug in the parser"
379        );
380    }
381
382    #[test]
383    fn test_binary_blob_round_trip_through_git() {
384        use crate::sync::gitref;
385
386        let dir = tempfile::tempdir().unwrap();
387        let repo = dir.path();
388        init_test_repo(repo);
389
390        // Bytes that would be mangled by trimming or a UTF-8 decode of cat-file
391        // output: leading/trailing NUL, embedded newlines, and high-bit bytes.
392        let fixture: Vec<u8> = vec![
393            0x00, 0x0a, 0x0d, 0xff, 0x80, b'a', 0x00, 0x81, 0xfe, 0x0a, 0x20, 0x00,
394        ];
395
396        let sha = gitref::write_blob(repo, &fixture).unwrap();
397        let read_back = gitref::read_blob(repo, &sha).unwrap();
398        assert_eq!(read_back, fixture, "binary blob must survive byte-for-byte");
399    }
400
401    #[test]
402    fn test_encrypt_decrypt_tombstones_roundtrip() {
403        let salt = generate_salt();
404        let key = derive_key("tombstone passphrase", &salt).unwrap();
405
406        let tombstones = vec![
407            Tombstone {
408                child_id: Uuid::new_v4().to_string(),
409                kind: "link".to_string(),
410                session_id: Some(Uuid::new_v4().to_string()),
411                deleted_at: Utc::now(),
412            },
413            Tombstone {
414                child_id: Uuid::new_v4().to_string(),
415                kind: "summary".to_string(),
416                session_id: None,
417                deleted_at: Utc::now(),
418            },
419        ];
420
421        let blob = encrypt_tombstones(&tombstones, &key).unwrap();
422        let restored = decrypt_tombstones(&blob, &key).unwrap();
423
424        assert_eq!(restored, tombstones);
425    }
426
427    #[test]
428    fn test_decrypt_tombstones_wrong_key_fails() {
429        let salt = generate_salt();
430        let key = derive_key("right", &salt).unwrap();
431        let wrong = derive_key("wrong", &salt).unwrap();
432
433        let tombstones = vec![Tombstone {
434            child_id: Uuid::new_v4().to_string(),
435            kind: "tag".to_string(),
436            session_id: None,
437            deleted_at: Utc::now(),
438        }];
439        let blob = encrypt_tombstones(&tombstones, &key).unwrap();
440
441        assert!(decrypt_tombstones(&blob, &wrong).is_err());
442    }
443
444    #[test]
445    fn test_record_with_no_summary() {
446        let salt = generate_salt();
447        let key = derive_key("passphrase", &salt).unwrap();
448
449        let mut record = sample_record();
450        record.summary = None;
451        record.links.clear();
452        record.tags.clear();
453        record.annotations.clear();
454
455        let blob = encrypt_session_record(&record, &key).unwrap();
456        let restored = decrypt_session_record(&blob, &key).unwrap();
457
458        assert!(restored.summary.is_none());
459        assert!(restored.links.is_empty());
460        assert!(restored.tags.is_empty());
461        assert!(restored.annotations.is_empty());
462    }
463}