Skip to main content

chub_core/team/tracking/
checkpoint.rs

1//! Checkpoint storage on orphan git branch.
2//!
3//! Stores checkpoints on `entire/checkpoints/v1` (orphan branch) with
4//! sharded directory structure compatible with entire.io.
5
6use std::fs;
7use std::path::Path;
8use std::process::Command;
9
10use serde::{Deserialize, Serialize};
11
12use super::session_state::SessionState;
13use super::types::{CheckpointID, InitialAttribution, Summary, TokenUsage};
14use crate::util::now_iso8601;
15
16// ---------------------------------------------------------------------------
17// Committed metadata (per-session, stored at <shard>/0/metadata.json)
18// ---------------------------------------------------------------------------
19
20/// Metadata for a single session within a checkpoint.
21/// Compatible with entire.io's `CommittedMetadata`.
22#[derive(Debug, Clone, Serialize, Deserialize)]
23#[serde(rename_all = "camelCase")]
24pub struct CommittedMetadata {
25    #[serde(default, skip_serializing_if = "Option::is_none")]
26    pub cli_version: Option<String>,
27    #[serde(rename = "checkpointID")]
28    pub checkpoint_id: CheckpointID,
29    #[serde(rename = "sessionID")]
30    pub session_id: String,
31    #[serde(default)]
32    pub strategy: String,
33    pub created_at: String,
34    #[serde(default, skip_serializing_if = "Option::is_none")]
35    pub branch: Option<String>,
36    #[serde(default)]
37    pub checkpoints_count: i32,
38    #[serde(default)]
39    pub files_touched: Vec<String>,
40    #[serde(default, skip_serializing_if = "Option::is_none")]
41    pub agent: Option<String>,
42    #[serde(default, skip_serializing_if = "Option::is_none")]
43    #[serde(rename = "turnID")]
44    pub turn_id: Option<String>,
45    #[serde(default)]
46    pub is_task: bool,
47    #[serde(default, skip_serializing_if = "Option::is_none")]
48    #[serde(rename = "toolUseID")]
49    pub tool_use_id: Option<String>,
50    #[serde(default, skip_serializing_if = "Option::is_none")]
51    pub transcript_identifier_at_start: Option<String>,
52    #[serde(default)]
53    pub checkpoint_transcript_start: i64,
54    #[serde(default, skip_serializing_if = "Option::is_none")]
55    pub token_usage: Option<TokenUsage>,
56    #[serde(default, skip_serializing_if = "Option::is_none")]
57    pub summary: Option<Summary>,
58    #[serde(default, skip_serializing_if = "Option::is_none")]
59    pub initial_attribution: Option<InitialAttribution>,
60}
61
62// ---------------------------------------------------------------------------
63// Checkpoint summary (root-level, stored at <shard>/metadata.json)
64// ---------------------------------------------------------------------------
65
66/// Root-level checkpoint summary aggregating all sessions.
67/// Compatible with entire.io's `CheckpointSummary`.
68#[derive(Debug, Clone, Serialize, Deserialize)]
69#[serde(rename_all = "camelCase")]
70pub struct CheckpointSummary {
71    #[serde(default, skip_serializing_if = "Option::is_none")]
72    pub cli_version: Option<String>,
73    #[serde(rename = "checkpointID")]
74    pub checkpoint_id: CheckpointID,
75    #[serde(default)]
76    pub strategy: String,
77    #[serde(default, skip_serializing_if = "Option::is_none")]
78    pub branch: Option<String>,
79    #[serde(default)]
80    pub checkpoints_count: i32,
81    #[serde(default)]
82    pub files_touched: Vec<String>,
83    #[serde(default)]
84    pub sessions: Vec<SessionFilePaths>,
85    #[serde(default, skip_serializing_if = "Option::is_none")]
86    pub token_usage: Option<TokenUsage>,
87}
88
89/// Paths to session files within a checkpoint (relative).
90#[derive(Debug, Clone, Serialize, Deserialize)]
91#[serde(rename_all = "camelCase")]
92pub struct SessionFilePaths {
93    pub metadata: String,
94    pub transcript: String,
95    pub content_hash: String,
96    pub prompt: String,
97}
98
99// ---------------------------------------------------------------------------
100// Checkpoint branch operations
101// ---------------------------------------------------------------------------
102
103const CHECKPOINT_BRANCH: &str = "entire/checkpoints/v1";
104
105/// Create a checkpoint from the current session state.
106/// Stores metadata and transcript on the orphan checkpoint branch.
107pub fn create_checkpoint(
108    state: &SessionState,
109    transcript_path: Option<&Path>,
110    attribution: Option<InitialAttribution>,
111) -> Option<CheckpointID> {
112    let checkpoint_id = CheckpointID::generate();
113
114    // Prepare checkpoint data in a temp directory
115    let tmp_dir = std::env::temp_dir().join(format!("chub-checkpoint-{}", checkpoint_id));
116    let _ = fs::create_dir_all(&tmp_dir);
117
118    let shard_path = checkpoint_id.shard_path();
119    let session_dir = tmp_dir.join(&shard_path).join("0");
120    let _ = fs::create_dir_all(&session_dir);
121
122    // Write committed metadata
123    let metadata = CommittedMetadata {
124        cli_version: Some(env!("CARGO_PKG_VERSION").to_string()),
125        checkpoint_id: checkpoint_id.clone(),
126        session_id: state.session_id.clone(),
127        strategy: "chub-track".to_string(),
128        created_at: now_iso8601(),
129        branch: None,
130        checkpoints_count: state.step_count,
131        files_touched: state.files_touched.clone(),
132        agent: state.agent_type.clone(),
133        turn_id: state.turn_id.clone(),
134        is_task: false,
135        tool_use_id: None,
136        transcript_identifier_at_start: state.transcript_identifier_at_start.clone(),
137        checkpoint_transcript_start: state.checkpoint_transcript_start,
138        token_usage: state.token_usage.clone(),
139        summary: None,
140        initial_attribution: attribution,
141    };
142
143    let meta_json = serde_json::to_string_pretty(&metadata).unwrap_or_default() + "\n";
144    let _ = fs::write(session_dir.join("metadata.json"), &meta_json);
145
146    // Copy transcript if available
147    let transcript_rel = if let Some(tp) = transcript_path {
148        if tp.exists() {
149            let dest = session_dir.join("full.jsonl");
150            let _ = fs::copy(tp, &dest);
151
152            // Write content hash
153            if let Ok(content) = fs::read(tp) {
154                use sha2::{Digest, Sha256};
155                let hash = format!("{:x}", Sha256::digest(&content));
156                let _ = fs::write(session_dir.join("content_hash.txt"), &hash);
157            }
158            "0/full.jsonl".to_string()
159        } else {
160            String::new()
161        }
162    } else {
163        String::new()
164    };
165
166    // Write prompt
167    if let Some(ref prompt) = state.first_prompt {
168        let _ = fs::write(session_dir.join("prompt.txt"), prompt);
169    }
170
171    // Write root checkpoint summary
172    let summary = CheckpointSummary {
173        cli_version: Some(env!("CARGO_PKG_VERSION").to_string()),
174        checkpoint_id: checkpoint_id.clone(),
175        strategy: "chub-track".to_string(),
176        branch: None,
177        checkpoints_count: state.step_count,
178        files_touched: state.files_touched.clone(),
179        sessions: vec![SessionFilePaths {
180            metadata: "0/metadata.json".to_string(),
181            transcript: transcript_rel,
182            content_hash: "0/content_hash.txt".to_string(),
183            prompt: "0/prompt.txt".to_string(),
184        }],
185        token_usage: state.token_usage.clone(),
186    };
187
188    let summary_json = serde_json::to_string_pretty(&summary).unwrap_or_default() + "\n";
189    let root_dir = tmp_dir.join(&shard_path);
190    let _ = fs::write(root_dir.join("metadata.json"), &summary_json);
191
192    // Commit to orphan branch using git
193    let committed =
194        commit_to_checkpoint_branch(&tmp_dir, &shard_path, &state.session_id, &checkpoint_id);
195
196    // Cleanup temp
197    let _ = fs::remove_dir_all(&tmp_dir);
198
199    if committed {
200        Some(checkpoint_id)
201    } else {
202        None
203    }
204}
205
206/// Commit checkpoint data to the orphan branch.
207fn commit_to_checkpoint_branch(
208    tmp_dir: &Path,
209    shard_path: &str,
210    session_id: &str,
211    checkpoint_id: &CheckpointID,
212) -> bool {
213    // Ensure orphan branch exists
214    ensure_checkpoint_branch();
215
216    // Use git worktree or direct tree manipulation
217    // For simplicity, use a temporary checkout approach
218    let worktree_dir = std::env::temp_dir().join(format!("chub-wt-{}", checkpoint_id));
219
220    // Create a temporary worktree for the checkpoint branch
221    let wt_result = Command::new("git")
222        .args(["worktree", "add", "--detach"])
223        .arg(worktree_dir.to_str().unwrap_or(""))
224        .arg(CHECKPOINT_BRANCH)
225        .output();
226
227    if wt_result.is_err() || !wt_result.as_ref().unwrap().status.success() {
228        // Fallback: try without worktree (direct git operations)
229        return commit_direct(tmp_dir, shard_path, session_id, checkpoint_id);
230    }
231
232    // Copy checkpoint files
233    let dest_dir = worktree_dir.join(shard_path);
234    let _ = fs::create_dir_all(&dest_dir);
235    copy_dir_recursive(&tmp_dir.join(shard_path), &dest_dir);
236
237    // Stage and commit
238    let success = Command::new("git")
239        .args(["-C", worktree_dir.to_str().unwrap_or(""), "add", "."])
240        .status()
241        .map(|s| s.success())
242        .unwrap_or(false)
243        && Command::new("git")
244            .args([
245                "-C",
246                worktree_dir.to_str().unwrap_or(""),
247                "commit",
248                "-m",
249                &format!(
250                    "Checkpoint: {}",
251                    &checkpoint_id.0[..12.min(checkpoint_id.0.len())]
252                ),
253            ])
254            .output()
255            .map(|o| o.status.success())
256            .unwrap_or(false);
257
258    // Cleanup worktree
259    let _ = Command::new("git")
260        .args(["worktree", "remove", "--force"])
261        .arg(worktree_dir.to_str().unwrap_or(""))
262        .output();
263
264    success
265}
266
267/// Direct commit approach without worktree (fallback).
268fn commit_direct(
269    tmp_dir: &Path,
270    shard_path: &str,
271    session_id: &str,
272    checkpoint_id: &CheckpointID,
273) -> bool {
274    // Use git hash-object + update-index + write-tree + commit-tree
275    // This is more complex but doesn't require a worktree
276
277    let src_dir = tmp_dir.join(shard_path);
278    if !src_dir.is_dir() {
279        return false;
280    }
281
282    // Get the current tree of the checkpoint branch
283    let parent = Command::new("git")
284        .args(["rev-parse", CHECKPOINT_BRANCH])
285        .output()
286        .ok()
287        .and_then(|o| {
288            let s = String::from_utf8_lossy(&o.stdout).trim().to_string();
289            if s.is_empty() || !o.status.success() {
290                None
291            } else {
292                Some(s)
293            }
294        });
295
296    // For each file in the checkpoint, hash it and build a tree
297    let mut blobs: Vec<(String, String)> = Vec::new();
298    collect_files(&src_dir, &src_dir, &mut blobs);
299
300    if blobs.is_empty() {
301        return false;
302    }
303
304    // Hash all blobs
305    let mut index_entries = Vec::new();
306    for (rel_path, abs_path) in &blobs {
307        let hash = Command::new("git")
308            .args(["hash-object", "-w", abs_path])
309            .output()
310            .ok()
311            .and_then(|o| {
312                let s = String::from_utf8_lossy(&o.stdout).trim().to_string();
313                if s.is_empty() {
314                    None
315                } else {
316                    Some(s)
317                }
318            });
319        if let Some(hash) = hash {
320            index_entries.push((format!("{}/{}", shard_path, rel_path), hash));
321        }
322    }
323
324    // Build index and tree using a temporary index
325    let tmp_index = std::env::temp_dir().join(format!("chub-index-{}", checkpoint_id));
326
327    // If we have a parent, read its tree first
328    if let Some(ref parent_hash) = parent {
329        let _ = Command::new("git")
330            .env("GIT_INDEX_FILE", tmp_index.to_str().unwrap_or(""))
331            .args(["read-tree", parent_hash])
332            .output();
333    }
334
335    // Add our entries
336    for (path, hash) in &index_entries {
337        let _ = Command::new("git")
338            .env("GIT_INDEX_FILE", tmp_index.to_str().unwrap_or(""))
339            .args(["update-index", "--add", "--cacheinfo", "100644", hash, path])
340            .output();
341    }
342
343    // Write tree
344    let tree = Command::new("git")
345        .env("GIT_INDEX_FILE", tmp_index.to_str().unwrap_or(""))
346        .args(["write-tree"])
347        .output()
348        .ok()
349        .and_then(|o| {
350            let s = String::from_utf8_lossy(&o.stdout).trim().to_string();
351            if s.is_empty() {
352                None
353            } else {
354                Some(s)
355            }
356        });
357
358    let _ = fs::remove_file(&tmp_index);
359
360    let tree = match tree {
361        Some(t) => t,
362        None => return false,
363    };
364
365    // Create commit
366    let msg = format!(
367        "Checkpoint: {}\n\nEntire-Session: {}\nEntire-Strategy: chub-track",
368        &checkpoint_id.0[..12.min(checkpoint_id.0.len())],
369        session_id
370    );
371
372    let mut commit_args = vec!["commit-tree".to_string(), tree];
373    if let Some(ref parent_hash) = parent {
374        commit_args.push("-p".to_string());
375        commit_args.push(parent_hash.clone());
376    }
377    commit_args.push("-m".to_string());
378    commit_args.push(msg);
379
380    let commit = Command::new("git")
381        .args(&commit_args)
382        .output()
383        .ok()
384        .and_then(|o| {
385            let s = String::from_utf8_lossy(&o.stdout).trim().to_string();
386            if s.is_empty() {
387                None
388            } else {
389                Some(s)
390            }
391        });
392
393    if let Some(commit_hash) = commit {
394        // Update branch ref
395        Command::new("git")
396            .args([
397                "update-ref",
398                &format!("refs/heads/{}", CHECKPOINT_BRANCH),
399                &commit_hash,
400            ])
401            .output()
402            .map(|o| o.status.success())
403            .unwrap_or(false)
404    } else {
405        false
406    }
407}
408
409/// Ensure the checkpoint orphan branch exists.
410fn ensure_checkpoint_branch() {
411    let exists = Command::new("git")
412        .args(["rev-parse", "--verify", CHECKPOINT_BRANCH])
413        .output()
414        .map(|o| o.status.success())
415        .unwrap_or(false);
416
417    if !exists {
418        // Create orphan branch with empty tree
419        let empty_tree = Command::new("git")
420            .args(["hash-object", "-t", "tree", "/dev/null"])
421            .output()
422            .ok()
423            .and_then(|o| {
424                let s = String::from_utf8_lossy(&o.stdout).trim().to_string();
425                if s.is_empty() {
426                    None
427                } else {
428                    Some(s)
429                }
430            })
431            .unwrap_or_else(|| {
432                // Fallback: create empty tree manually
433                "4b825dc642cb6eb9a060e54bf899d69f7264209e".to_string()
434            });
435
436        let commit = Command::new("git")
437            .args([
438                "commit-tree",
439                &empty_tree,
440                "-m",
441                "Initialize checkpoint branch",
442            ])
443            .output()
444            .ok()
445            .and_then(|o| {
446                let s = String::from_utf8_lossy(&o.stdout).trim().to_string();
447                if s.is_empty() {
448                    None
449                } else {
450                    Some(s)
451                }
452            });
453
454        if let Some(hash) = commit {
455            let _ = Command::new("git")
456                .args([
457                    "update-ref",
458                    &format!("refs/heads/{}", CHECKPOINT_BRANCH),
459                    &hash,
460                ])
461                .output();
462        }
463    }
464}
465
466/// List checkpoints from the orphan branch.
467pub fn list_checkpoints() -> Vec<CheckpointSummary> {
468    let output = Command::new("git")
469        .args(["ls-tree", "-r", "--name-only", CHECKPOINT_BRANCH])
470        .output();
471
472    let output = match output {
473        Ok(o) if o.status.success() => o,
474        _ => return vec![],
475    };
476
477    let text = String::from_utf8_lossy(&output.stdout);
478    let mut summaries = Vec::new();
479    let mut seen_checkpoints: std::collections::HashSet<String> = std::collections::HashSet::new();
480
481    for line in text.lines() {
482        // Look for root metadata.json files: <xx>/<rest>/metadata.json
483        // But NOT <xx>/<rest>/0/metadata.json (those are per-session)
484        let parts: Vec<&str> = line.split('/').collect();
485        if parts.len() == 3 && parts[2] == "metadata.json" {
486            let checkpoint_id = format!("{}{}", parts[0], parts[1]);
487            if seen_checkpoints.insert(checkpoint_id.clone()) {
488                // Read the metadata
489                if let Some(summary) = read_checkpoint_summary(&checkpoint_id) {
490                    summaries.push(summary);
491                }
492            }
493        }
494    }
495
496    summaries
497}
498
499/// Read a checkpoint summary from the orphan branch.
500fn read_checkpoint_summary(checkpoint_id: &str) -> Option<CheckpointSummary> {
501    let id = CheckpointID(checkpoint_id.to_string());
502    let path = format!("{}/metadata.json", id.shard_path());
503
504    let output = Command::new("git")
505        .args(["show", &format!("{}:{}", CHECKPOINT_BRANCH, path)])
506        .output()
507        .ok()?;
508
509    if !output.status.success() {
510        return None;
511    }
512
513    let content = String::from_utf8_lossy(&output.stdout);
514    serde_json::from_str(&content).ok()
515}
516
517// ---------------------------------------------------------------------------
518// Helpers
519// ---------------------------------------------------------------------------
520
521fn copy_dir_recursive(src: &Path, dst: &Path) {
522    if let Ok(entries) = fs::read_dir(src) {
523        for entry in entries.flatten() {
524            let src_path = entry.path();
525            let dst_path = dst.join(entry.file_name());
526            if src_path.is_dir() {
527                let _ = fs::create_dir_all(&dst_path);
528                copy_dir_recursive(&src_path, &dst_path);
529            } else {
530                let _ = fs::copy(&src_path, &dst_path);
531            }
532        }
533    }
534}
535
536fn collect_files(base: &Path, dir: &Path, out: &mut Vec<(String, String)>) {
537    if let Ok(entries) = fs::read_dir(dir) {
538        for entry in entries.flatten() {
539            let path = entry.path();
540            if path.is_dir() {
541                collect_files(base, &path, out);
542            } else if let Ok(rel) = path.strip_prefix(base) {
543                let rel_str = rel.to_string_lossy().replace('\\', "/");
544                out.push((rel_str, path.to_string_lossy().to_string()));
545            }
546        }
547    }
548}
549
550#[cfg(test)]
551mod tests {
552    use super::*;
553
554    #[test]
555    fn committed_metadata_json_compat() {
556        let meta = CommittedMetadata {
557            cli_version: Some("0.1.15".to_string()),
558            checkpoint_id: CheckpointID("a3b2c4d5e6f7".to_string()),
559            session_id: "2026-03-22-abc12345".to_string(),
560            strategy: "chub-track".to_string(),
561            created_at: "2026-03-22T10:00:00.000Z".to_string(),
562            branch: None,
563            checkpoints_count: 1,
564            files_touched: vec!["src/main.rs".to_string()],
565            agent: Some("Claude Code".to_string()),
566            turn_id: None,
567            is_task: false,
568            tool_use_id: None,
569            transcript_identifier_at_start: None,
570            checkpoint_transcript_start: 0,
571            token_usage: Some(TokenUsage {
572                input_tokens: 1000,
573                output_tokens: 500,
574                ..Default::default()
575            }),
576            summary: None,
577            initial_attribution: None,
578        };
579
580        let json = serde_json::to_string_pretty(&meta).unwrap();
581        // Verify camelCase field names (entire.io compatible)
582        assert!(json.contains("\"checkpointID\""));
583        assert!(json.contains("\"sessionID\""));
584        assert!(json.contains("\"filesTouched\""));
585        assert!(json.contains("\"Claude Code\""));
586        assert!(json.contains("\"inputTokens\""));
587
588        // Roundtrip
589        let parsed: CommittedMetadata = serde_json::from_str(&json).unwrap();
590        assert_eq!(parsed.checkpoint_id.0, "a3b2c4d5e6f7");
591    }
592}