Skip to main content

claude_code_transcripts/
lib.rs

1#![doc = include_str!("../README.md")]
2
3pub mod types;
4
5use std::fs;
6use std::io::{BufRead, BufReader};
7use std::path::{Path, PathBuf};
8
9use serde_json::Value;
10use types::Entry;
11
12// ---------------------------------------------------------------------------
13// Public result type
14// ---------------------------------------------------------------------------
15
16pub struct TranscriptResult {
17    pub path: PathBuf,
18    pub total: usize,
19    pub ok: usize,
20    pub parse_errors: Vec<(usize, String, String)>, // (line, type, err)
21    pub roundtrip_errors: Vec<(usize, String, Vec<Diff>)>,
22    pub io_error: Option<String>,
23}
24
25impl TranscriptResult {
26    pub fn has_errors(&self) -> bool {
27        self.io_error.is_some()
28            || !self.parse_errors.is_empty()
29            || !self.roundtrip_errors.is_empty()
30    }
31
32    pub fn print_report(&self) {
33        println!("Transcript: {}", self.path.display());
34        if let Some(e) = &self.io_error {
35            println!("  IO error: {e}");
36            return;
37        }
38        println!("  Lines:     {}", self.total);
39        println!("  OK:        {}", self.ok);
40        println!("  Parse err: {}", self.parse_errors.len());
41        println!("  RT diff:   {}", self.roundtrip_errors.len());
42
43        if !self.parse_errors.is_empty() {
44            println!("\n  ── Parse errors ──────────────────────────────────────────");
45            for (line, ty, err) in &self.parse_errors {
46                println!("    line {line:>4}  type={ty:30}  {err}");
47            }
48        }
49
50        if !self.roundtrip_errors.is_empty() {
51            println!("\n  ── Roundtrip diffs ───────────────────────────────────────");
52            for (line, ty, diffs) in &self.roundtrip_errors {
53                println!("    line {line:>4}  type={ty}");
54                for d in diffs.iter().take(10) {
55                    println!("      {d}");
56                }
57                if diffs.len() > 10 {
58                    println!("      … ({} more)", diffs.len() - 10);
59                }
60            }
61        }
62    }
63}
64
65// ---------------------------------------------------------------------------
66// Core check logic
67// ---------------------------------------------------------------------------
68
69pub fn check_transcript(path: &Path) -> TranscriptResult {
70    let mut result = TranscriptResult {
71        path: path.to_owned(),
72        total: 0,
73        ok: 0,
74        parse_errors: Vec::new(),
75        roundtrip_errors: Vec::new(),
76        io_error: None,
77    };
78
79    let file = match fs::File::open(path) {
80        Ok(f) => f,
81        Err(e) => {
82            result.io_error = Some(e.to_string());
83            return result;
84        }
85    };
86
87    for (idx, line) in BufReader::new(file).lines().enumerate() {
88        let line = match line {
89            Ok(l) => l,
90            Err(e) => {
91                result.io_error = Some(format!("IO error at line {}: {e}", idx + 1));
92                return result;
93            }
94        };
95        // Strip null bytes (can appear in corrupt JSONL lines) then whitespace.
96        let line: String = line.chars().filter(|c| *c != '\0').collect();
97        let line = line.trim().to_owned();
98        if line.is_empty() {
99            continue;
100        }
101        result.total += 1;
102
103        let raw: Value = match serde_json::from_str(&line) {
104            Ok(v) => v,
105            Err(e) => {
106                result
107                    .parse_errors
108                    .push((idx + 1, "(not json)".into(), e.to_string()));
109                continue;
110            }
111        };
112
113        let entry_type = raw
114            .get("type")
115            .and_then(Value::as_str)
116            .unwrap_or("(no type)")
117            .to_owned();
118
119        let entry: Entry = match serde_json::from_value(raw.clone()) {
120            Ok(e) => e,
121            Err(e) => {
122                result
123                    .parse_errors
124                    .push((idx + 1, entry_type, e.to_string()));
125                continue;
126            }
127        };
128
129        let roundtripped: Value = match serde_json::to_value(&entry) {
130            Ok(v) => v,
131            Err(e) => {
132                result
133                    .parse_errors
134                    .push((idx + 1, entry_type, format!("re-serialize: {e}")));
135                continue;
136            }
137        };
138
139        let diffs = diff_values("", &raw, &roundtripped);
140        if diffs.is_empty() {
141            result.ok += 1;
142        } else {
143            result.roundtrip_errors.push((idx + 1, entry_type, diffs));
144        }
145    }
146
147    result
148}
149
150// ---------------------------------------------------------------------------
151// Recursive value diff
152// ---------------------------------------------------------------------------
153
154pub struct Diff(pub String);
155
156impl std::fmt::Display for Diff {
157    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
158        write!(f, "{}", self.0)
159    }
160}
161
162pub fn diff_values(path: &str, a: &Value, b: &Value) -> Vec<Diff> {
163    let mut out = Vec::new();
164    diff_inner(path, a, b, &mut out);
165    out
166}
167
168fn diff_inner(path: &str, a: &Value, b: &Value, out: &mut Vec<Diff>) {
169    match (a, b) {
170        (Value::Object(ma), Value::Object(mb)) => {
171            for (k, va) in ma {
172                let child = child_path(path, k);
173                match mb.get(k) {
174                    None => out.push(Diff(format!("missing in output:  {child} = {va}"))),
175                    Some(vb) => diff_inner(&child, va, vb, out),
176                }
177            }
178            for k in mb.keys() {
179                if !ma.contains_key(k) {
180                    let child = child_path(path, k);
181                    out.push(Diff(format!("extra in output:    {child} = {}", mb[k])));
182                }
183            }
184        }
185        (Value::Array(aa), Value::Array(ab)) => {
186            if aa.len() != ab.len() {
187                out.push(Diff(format!(
188                    "array length mismatch at {path}: {} vs {}",
189                    aa.len(),
190                    ab.len()
191                )));
192                return;
193            }
194            for (i, (va, vb)) in aa.iter().zip(ab.iter()).enumerate() {
195                diff_inner(&format!("{path}[{i}]"), va, vb, out);
196            }
197        }
198        (Value::Number(na), Value::Number(nb)) => {
199            let fa = na.as_f64().unwrap_or(f64::NAN);
200            let fb = nb.as_f64().unwrap_or(f64::NAN);
201            if (fa - fb).abs() > f64::EPSILON {
202                out.push(Diff(format!("value mismatch at {path}: {a} vs {b}")));
203            }
204        }
205        _ => {
206            if a != b {
207                let label = if path.is_empty() { "(root)" } else { path };
208                out.push(Diff(format!("value mismatch at {label}: {a} vs {b}")));
209            }
210        }
211    }
212}
213
214fn child_path(parent: &str, key: &str) -> String {
215    if parent.is_empty() {
216        key.to_owned()
217    } else {
218        format!("{parent}.{key}")
219    }
220}