Skip to main content

cersei_memory/
session_storage.rs

1//! Session storage: append-only JSONL transcript persistence.
2//!
3//! Each session is a `.jsonl` file with one entry per line.
4//!
5//! When a session file exceeds `MAX_SESSION_SIZE` (50MB), writes automatically
6//! fork to a new part file (`session_part2.jsonl`, `_part3.jsonl`, etc.).
7//! Loading stitches all parts together transparently.
8
9use cersei_types::*;
10use serde::{Deserialize, Serialize};
11use std::collections::{HashMap, HashSet};
12use std::path::{Path, PathBuf};
13
14// ─── Constants ───────────────────────────────────────────────────────────────
15
16const MAX_SESSION_SIZE: u64 = 50_000_000; // 50MB per part
17const MAX_TOTAL_SESSION_SIZE: u64 = 200_000_000; // 200MB across all parts
18
19// ─── Types ───────────────────────────────────────────────────────────────────
20
21/// A single entry in the session transcript.
22#[derive(Debug, Clone, Serialize, Deserialize)]
23#[serde(tag = "type", rename_all = "lowercase")]
24pub enum TranscriptEntry {
25    User(TranscriptMessage),
26    Assistant(TranscriptMessage),
27    System(TranscriptMessage),
28    Summary(SummaryEntry),
29    Tombstone(TombstoneEntry),
30    #[serde(other)]
31    Unknown,
32}
33
34/// A conversation message entry.
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct TranscriptMessage {
37    pub uuid: String,
38    #[serde(skip_serializing_if = "Option::is_none")]
39    pub parent_uuid: Option<String>,
40    pub timestamp: String,
41    pub session_id: String,
42    #[serde(default)]
43    pub cwd: String,
44    pub message: Message,
45    #[serde(default)]
46    pub is_sidechain: bool,
47    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
48    pub extra: HashMap<String, serde_json::Value>,
49}
50
51/// A compaction summary entry.
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct SummaryEntry {
54    pub uuid: String,
55    pub timestamp: String,
56    pub session_id: String,
57    pub summary: String,
58    pub messages_compacted: usize,
59}
60
61/// A soft-delete marker.
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct TombstoneEntry {
64    pub deleted_uuid: String,
65    pub timestamp: String,
66}
67
68// ─── Path resolution ─────────────────────────────────────────────────────────
69
70/// Compute the base transcript file path for a session.
71pub fn transcript_path(project_root: &Path, session_id: &str) -> PathBuf {
72    let sanitized = super::memdir::sanitize_path_component(&project_root.display().to_string());
73    let home = dirs::home_dir().unwrap_or_else(|| PathBuf::from("."));
74    home.join(".claude")
75        .join("projects")
76        .join(sanitized)
77        .join(format!("{}.jsonl", session_id))
78}
79
80// ─── Multi-part helpers ─────────────────────────────────────────────────────
81
82/// Find the current (latest) part file for writing.
83/// Returns the base path if it doesn't exist yet or is under the size limit,
84/// or the highest existing `_partN` path that's still under the limit.
85fn current_write_path(base_path: &Path) -> PathBuf {
86    let stem = match base_path.file_stem().and_then(|s| s.to_str()) {
87        Some(s) => s.to_string(),
88        None => return base_path.to_path_buf(),
89    };
90    let dir = base_path.parent().unwrap_or(Path::new("."));
91
92    // Find the highest existing part
93    let mut highest = base_path.to_path_buf();
94    let mut n = 2;
95    loop {
96        let part = dir.join(format!("{}_part{}.jsonl", stem, n));
97        if part.exists() {
98            highest = part;
99            n += 1;
100        } else {
101            break;
102        }
103    }
104
105    highest
106}
107
108/// Compute the next part file path (the first `_partN` that doesn't exist).
109fn next_part_path(base_path: &Path) -> PathBuf {
110    let stem = match base_path.file_stem().and_then(|s| s.to_str()) {
111        Some(s) => s.to_string(),
112        None => return base_path.to_path_buf(),
113    };
114    let dir = base_path.parent().unwrap_or(Path::new("."));
115
116    let mut n = 2;
117    loop {
118        let part = dir.join(format!("{}_part{}.jsonl", stem, n));
119        if !part.exists() {
120            return part;
121        }
122        n += 1;
123    }
124}
125
126/// List all part files for a session, in order (base first, then _part2, _part3, ...).
127pub fn all_part_paths(base_path: &Path) -> Vec<PathBuf> {
128    let mut parts = Vec::new();
129    if base_path.exists() {
130        parts.push(base_path.to_path_buf());
131    }
132
133    let stem = match base_path.file_stem().and_then(|s| s.to_str()) {
134        Some(s) => s.to_string(),
135        None => return parts,
136    };
137    let dir = base_path.parent().unwrap_or(Path::new("."));
138
139    let mut n = 2;
140    loop {
141        let part = dir.join(format!("{}_part{}.jsonl", stem, n));
142        if part.exists() {
143            parts.push(part);
144            n += 1;
145        } else {
146            break;
147        }
148    }
149
150    parts
151}
152
153/// Total size across all session parts.
154pub fn total_session_size(base_path: &Path) -> u64 {
155    all_part_paths(base_path)
156        .iter()
157        .filter_map(|p| std::fs::metadata(p).ok().map(|m| m.len()))
158        .sum()
159}
160
161// ─── Write ───────────────────────────────────────────────────────────────────
162
163/// Append a transcript entry to the session file.
164/// Automatically forks to a new part file if the current one exceeds 50MB.
165pub fn write_transcript_entry(path: &Path, entry: &TranscriptEntry) -> std::io::Result<()> {
166    use std::io::Write;
167
168    if let Some(parent) = path.parent() {
169        std::fs::create_dir_all(parent)?;
170    }
171
172    let line = serde_json::to_string(entry)?;
173    let mut write_path = current_write_path(path);
174
175    // Check if current part is too large — fork to next part
176    let current_size = std::fs::metadata(&write_path).map(|m| m.len()).unwrap_or(0);
177    if current_size + line.len() as u64 + 1 > MAX_SESSION_SIZE {
178        write_path = next_part_path(path);
179        if let Some(parent) = write_path.parent() {
180            std::fs::create_dir_all(parent)?;
181        }
182    }
183
184    let mut file = std::fs::OpenOptions::new()
185        .create(true)
186        .append(true)
187        .open(&write_path)?;
188
189    writeln!(file, "{}", line)?;
190    Ok(())
191}
192
193/// Write a user message entry.
194pub fn write_user_entry(
195    path: &Path,
196    session_id: &str,
197    message: Message,
198    cwd: &str,
199) -> std::io::Result<String> {
200    let uuid = uuid::Uuid::new_v4().to_string();
201    let entry = TranscriptEntry::User(TranscriptMessage {
202        uuid: uuid.clone(),
203        parent_uuid: None,
204        timestamp: chrono::Utc::now().to_rfc3339(),
205        session_id: session_id.to_string(),
206        cwd: cwd.to_string(),
207        message,
208        is_sidechain: false,
209        extra: HashMap::new(),
210    });
211    write_transcript_entry(path, &entry)?;
212    Ok(uuid)
213}
214
215/// Write an assistant message entry.
216pub fn write_assistant_entry(
217    path: &Path,
218    session_id: &str,
219    message: Message,
220    cwd: &str,
221    parent_uuid: Option<&str>,
222) -> std::io::Result<String> {
223    let uuid = uuid::Uuid::new_v4().to_string();
224    let entry = TranscriptEntry::Assistant(TranscriptMessage {
225        uuid: uuid.clone(),
226        parent_uuid: parent_uuid.map(String::from),
227        timestamp: chrono::Utc::now().to_rfc3339(),
228        session_id: session_id.to_string(),
229        cwd: cwd.to_string(),
230        message,
231        is_sidechain: false,
232        extra: HashMap::new(),
233    });
234    write_transcript_entry(path, &entry)?;
235    Ok(uuid)
236}
237
238/// Write a tombstone (soft-delete) entry.
239pub fn tombstone_entry(path: &Path, deleted_uuid: &str) -> std::io::Result<()> {
240    let entry = TranscriptEntry::Tombstone(TombstoneEntry {
241        deleted_uuid: deleted_uuid.to_string(),
242        timestamp: chrono::Utc::now().to_rfc3339(),
243    });
244    write_transcript_entry(path, &entry)
245}
246
247// ─── Read ────────────────────────────────────────────────────────────────────
248
249/// Load a session transcript from all part files, respecting tombstones.
250pub fn load_transcript(path: &Path) -> Result<Vec<TranscriptEntry>> {
251    let parts = all_part_paths(path);
252    if parts.is_empty() {
253        if !path.exists() {
254            return Ok(Vec::new());
255        }
256        // Base path doesn't match pattern but was passed directly
257        return load_single_transcript(path);
258    }
259
260    // Total size check across all parts
261    let total: u64 = parts
262        .iter()
263        .filter_map(|p| std::fs::metadata(p).ok().map(|m| m.len()))
264        .sum();
265    if total > MAX_TOTAL_SESSION_SIZE {
266        return Err(CerseiError::Config(format!(
267            "Session too large: {} bytes across {} parts (max {})",
268            total,
269            parts.len(),
270            MAX_TOTAL_SESSION_SIZE
271        )));
272    }
273
274    // Concatenate all lines from all parts
275    let mut content = String::new();
276    for part in &parts {
277        content.push_str(&std::fs::read_to_string(part)?);
278    }
279
280    parse_transcript_content(&content)
281}
282
283/// Load from a single file (internal helper).
284fn load_single_transcript(path: &Path) -> Result<Vec<TranscriptEntry>> {
285    let meta = std::fs::metadata(path)?;
286    if meta.len() > MAX_SESSION_SIZE {
287        return Err(CerseiError::Config(format!(
288            "Session file too large: {} bytes (max {})",
289            meta.len(),
290            MAX_SESSION_SIZE
291        )));
292    }
293    let content = std::fs::read_to_string(path)?;
294    parse_transcript_content(&content)
295}
296
297/// Parse transcript content with two-pass tombstone handling.
298fn parse_transcript_content(content: &str) -> Result<Vec<TranscriptEntry>> {
299    // Pass 1: collect tombstone UUIDs
300    let mut tombstones: HashSet<String> = HashSet::new();
301    for line in content.lines() {
302        if line.trim().is_empty() {
303            continue;
304        }
305        if let Ok(entry) = serde_json::from_str::<TranscriptEntry>(line) {
306            if let TranscriptEntry::Tombstone(t) = &entry {
307                tombstones.insert(t.deleted_uuid.clone());
308            }
309        }
310    }
311
312    // Pass 2: load entries, skip tombstoned
313    let mut entries = Vec::new();
314    for line in content.lines() {
315        if line.trim().is_empty() {
316            continue;
317        }
318        let entry: TranscriptEntry = match serde_json::from_str(line) {
319            Ok(e) => e,
320            Err(_) => continue,
321        };
322
323        let uuid = match &entry {
324            TranscriptEntry::User(m) => Some(&m.uuid),
325            TranscriptEntry::Assistant(m) => Some(&m.uuid),
326            TranscriptEntry::System(m) => Some(&m.uuid),
327            TranscriptEntry::Summary(s) => Some(&s.uuid),
328            TranscriptEntry::Tombstone(_) => continue,
329            TranscriptEntry::Unknown => None,
330        };
331
332        if let Some(uuid) = uuid {
333            if tombstones.contains(uuid) {
334                continue;
335            }
336        }
337
338        entries.push(entry);
339    }
340
341    Ok(entries)
342}
343
344/// Extract API messages from transcript entries.
345pub fn messages_from_transcript(entries: &[TranscriptEntry]) -> Vec<Message> {
346    entries
347        .iter()
348        .filter_map(|e| match e {
349            TranscriptEntry::User(m) => Some(m.message.clone()),
350            TranscriptEntry::Assistant(m) => Some(m.message.clone()),
351            TranscriptEntry::System(m) => Some(m.message.clone()),
352            _ => None,
353        })
354        .collect()
355}
356
357// ─── Tests ───────────────────────────────────────────────────────────────────
358
359#[cfg(test)]
360mod tests {
361    use super::*;
362
363    #[test]
364    fn test_write_and_load() {
365        let tmp = tempfile::tempdir().unwrap();
366        let path = tmp.path().join("session.jsonl");
367
368        let uuid1 = write_user_entry(&path, "s1", Message::user("Hello"), "/tmp").unwrap();
369        let _uuid2 =
370            write_assistant_entry(&path, "s1", Message::assistant("Hi!"), "/tmp", Some(&uuid1))
371                .unwrap();
372        write_user_entry(&path, "s1", Message::user("How are you?"), "/tmp").unwrap();
373
374        let entries = load_transcript(&path).unwrap();
375        assert_eq!(entries.len(), 3);
376
377        let messages = messages_from_transcript(&entries);
378        assert_eq!(messages.len(), 3);
379        assert_eq!(messages[0].get_text().unwrap(), "Hello");
380        assert_eq!(messages[1].get_text().unwrap(), "Hi!");
381    }
382
383    #[test]
384    fn test_tombstone() {
385        let tmp = tempfile::tempdir().unwrap();
386        let path = tmp.path().join("session.jsonl");
387
388        let _uuid1 = write_user_entry(&path, "s1", Message::user("Keep"), "/tmp").unwrap();
389        let uuid2 = write_user_entry(&path, "s1", Message::user("Delete me"), "/tmp").unwrap();
390        let _uuid3 = write_user_entry(&path, "s1", Message::user("Also keep"), "/tmp").unwrap();
391
392        tombstone_entry(&path, &uuid2).unwrap();
393
394        let entries = load_transcript(&path).unwrap();
395        assert_eq!(entries.len(), 2);
396
397        let messages = messages_from_transcript(&entries);
398        assert_eq!(messages.len(), 2);
399        assert_eq!(messages[0].get_text().unwrap(), "Keep");
400        assert_eq!(messages[1].get_text().unwrap(), "Also keep");
401    }
402
403    #[test]
404    fn test_empty_file() {
405        let tmp = tempfile::tempdir().unwrap();
406        let path = tmp.path().join("empty.jsonl");
407        std::fs::write(&path, "").unwrap();
408
409        let entries = load_transcript(&path).unwrap();
410        assert!(entries.is_empty());
411    }
412
413    #[test]
414    fn test_malformed_lines_skipped() {
415        let tmp = tempfile::tempdir().unwrap();
416        let path = tmp.path().join("session.jsonl");
417
418        write_user_entry(&path, "s1", Message::user("Valid"), "/tmp").unwrap();
419        {
420            use std::io::Write;
421            let mut f = std::fs::OpenOptions::new()
422                .append(true)
423                .open(&path)
424                .unwrap();
425            writeln!(f, "{{not valid json}}").unwrap();
426        }
427        write_user_entry(&path, "s1", Message::user("Also valid"), "/tmp").unwrap();
428
429        let entries = load_transcript(&path).unwrap();
430        assert_eq!(entries.len(), 2);
431    }
432
433    #[test]
434    fn test_transcript_path() {
435        let path = transcript_path(Path::new("/Users/test/project"), "abc-123");
436        assert!(path.to_str().unwrap().contains("abc-123.jsonl"));
437        assert!(path.to_str().unwrap().contains(".claude"));
438    }
439
440    #[test]
441    fn test_summary_entry() {
442        let tmp = tempfile::tempdir().unwrap();
443        let path = tmp.path().join("session.jsonl");
444
445        write_user_entry(&path, "s1", Message::user("Msg 1"), "/tmp").unwrap();
446        let summary = TranscriptEntry::Summary(SummaryEntry {
447            uuid: "sum-1".into(),
448            timestamp: chrono::Utc::now().to_rfc3339(),
449            session_id: "s1".into(),
450            summary: "User asked about X, assistant did Y.".into(),
451            messages_compacted: 5,
452        });
453        write_transcript_entry(&path, &summary).unwrap();
454        write_user_entry(&path, "s1", Message::user("Msg 2"), "/tmp").unwrap();
455
456        let entries = load_transcript(&path).unwrap();
457        assert_eq!(entries.len(), 3);
458    }
459
460    #[test]
461    fn test_auto_fork_on_size_limit() {
462        let tmp = tempfile::tempdir().unwrap();
463        let path = tmp.path().join("big.jsonl");
464
465        // Create a large message (~1KB each)
466        let big_text = "x".repeat(1000);
467
468        // Write enough to exceed a small limit — we test the logic
469        // by temporarily using the real MAX_SESSION_SIZE (50MB),
470        // so instead we test the path helpers directly
471        assert_eq!(all_part_paths(&path).len(), 0); // nothing yet
472
473        write_user_entry(&path, "s1", Message::user(&big_text), "/tmp").unwrap();
474        assert_eq!(all_part_paths(&path).len(), 1); // base file
475
476        // Verify current_write_path returns base
477        assert_eq!(current_write_path(&path), path);
478
479        // next_part_path should be _part2
480        let part2 = next_part_path(&path);
481        assert!(part2.to_str().unwrap().contains("_part2"));
482    }
483
484    #[test]
485    fn test_multi_part_load() {
486        let tmp = tempfile::tempdir().unwrap();
487        let base = tmp.path().join("multi.jsonl");
488        let part2 = tmp.path().join("multi_part2.jsonl");
489
490        // Write to base
491        write_user_entry(&base, "s1", Message::user("Part 1 msg"), "/tmp").unwrap();
492
493        // Write directly to part2 (simulating auto-fork)
494        write_user_entry(&part2, "s1", Message::user("Part 2 msg"), "/tmp").unwrap();
495
496        // Load should stitch both
497        let entries = load_transcript(&base).unwrap();
498        assert_eq!(entries.len(), 2);
499
500        let messages = messages_from_transcript(&entries);
501        assert_eq!(messages[0].get_text().unwrap(), "Part 1 msg");
502        assert_eq!(messages[1].get_text().unwrap(), "Part 2 msg");
503    }
504
505    #[test]
506    fn test_tombstone_across_parts() {
507        let tmp = tempfile::tempdir().unwrap();
508        let base = tmp.path().join("tomb.jsonl");
509        let part2 = tmp.path().join("tomb_part2.jsonl");
510
511        // Message in part 1
512        let uuid1 = write_user_entry(&base, "s1", Message::user("Delete me"), "/tmp").unwrap();
513        write_user_entry(&base, "s1", Message::user("Keep"), "/tmp").unwrap();
514
515        // Tombstone in part 2 deletes message from part 1
516        tombstone_entry(&part2, &uuid1).unwrap();
517        write_user_entry(&part2, "s1", Message::user("Also keep"), "/tmp").unwrap();
518
519        let entries = load_transcript(&base).unwrap();
520        assert_eq!(entries.len(), 2);
521
522        let messages = messages_from_transcript(&entries);
523        assert_eq!(messages[0].get_text().unwrap(), "Keep");
524        assert_eq!(messages[1].get_text().unwrap(), "Also keep");
525    }
526
527    #[test]
528    fn test_total_session_size() {
529        let tmp = tempfile::tempdir().unwrap();
530        let base = tmp.path().join("sized.jsonl");
531
532        write_user_entry(&base, "s1", Message::user("Hello"), "/tmp").unwrap();
533        let size = total_session_size(&base);
534        assert!(size > 0);
535    }
536}