Skip to main content

cc_token_usage/data/
parser.rs

1use anyhow::{Context, Result};
2use chrono::{DateTime, Utc};
3use std::collections::HashMap;
4use std::fs::File;
5use std::io::{BufRead, BufReader};
6use std::path::Path;
7
8use cc_session_jsonl::types::{ApiMessage, AssistantEntry, ContentBlock, Entry, UserEntry};
9
10use super::models::{
11    AttributionData, CollapseCommit, CollapseSnapshot, DataQuality, PrLinkInfo, SessionMetadata,
12    TokenUsage, ValidatedTurn,
13};
14
15// ─── Pipeline Stage 1: JSON Parse (now via cc-session-jsonl) ──────────────
16
17fn parse_line(line: &str) -> Option<Entry> {
18    cc_session_jsonl::parse_entry(line).ok()
19}
20
21// ─── Pipeline Stage 2: Type Filter + User Text Extraction ──────────────────
22
23/// Extract user message text (truncated to 500 chars) for pairing with assistant turns.
24fn extract_user_text(user_entry: &UserEntry) -> Option<String> {
25    let content_val = user_entry.message.as_ref()?.content.as_ref()?;
26
27    let text = if let Some(s) = content_val.as_str() {
28        s.to_string()
29    } else if let Some(arr) = content_val.as_array() {
30        arr.iter()
31            .filter_map(|b| {
32                if b.get("type").and_then(|t| t.as_str()) == Some("text") {
33                    b.get("text")
34                        .and_then(|t| t.as_str())
35                        .map(|s| s.to_string())
36                } else {
37                    None
38                }
39            })
40            .collect::<Vec<_>>()
41            .join("\n")
42    } else {
43        return None;
44    };
45
46    if text.is_empty() {
47        return None;
48    }
49
50    Some(if text.len() > 500 {
51        format!("{}...", &text[..text.floor_char_boundary(500)])
52    } else {
53        text
54    })
55}
56
57// ─── Pipeline Stage 3: Validation ──────────────────────────────────────────
58
59enum FilterReason {
60    NoApiMessage,
61    Sidechain,
62    Synthetic,
63    NoModel,
64    NoUsage,
65    ZeroUsage,
66    InvalidTimestamp,
67}
68
69struct ValidatedFields {
70    uuid: String,
71    request_id: Option<String>,
72    timestamp: DateTime<Utc>,
73    model: String,
74    usage: TokenUsage,
75    stop_reason: Option<String>,
76    content: Option<Vec<ContentBlock>>,
77    agent_id: Option<String>,
78    service_tier: Option<String>,
79    speed: Option<String>,
80    inference_geo: Option<String>,
81    git_branch: Option<String>,
82}
83
84fn validate_assistant(
85    msg: AssistantEntry,
86    is_agent: bool,
87    now: DateTime<Utc>,
88) -> std::result::Result<ValidatedFields, FilterReason> {
89    let api: ApiMessage = msg.message.ok_or(FilterReason::NoApiMessage)?;
90
91    // Sidechain filter (skip for agent files -- they always have isSidechain=true)
92    if !is_agent && msg.is_sidechain == Some(true) {
93        return Err(FilterReason::Sidechain);
94    }
95
96    // Synthetic filter
97    if api.model.as_deref() == Some("<synthetic>") {
98        return Err(FilterReason::Synthetic);
99    }
100
101    let model = api.model.ok_or(FilterReason::NoModel)?;
102    let lib_usage = api.usage.ok_or(FilterReason::NoUsage)?;
103
104    // Non-zero usage
105    let total_tokens = lib_usage.input_tokens.unwrap_or(0)
106        + lib_usage.output_tokens.unwrap_or(0)
107        + lib_usage.cache_creation_input_tokens.unwrap_or(0)
108        + lib_usage.cache_read_input_tokens.unwrap_or(0);
109    if total_tokens == 0 {
110        return Err(FilterReason::ZeroUsage);
111    }
112
113    // Capture service fields before conversion consumes them
114    let service_tier = lib_usage.service_tier.clone();
115    let speed = lib_usage.speed.clone();
116    let inference_geo = lib_usage.inference_geo.clone();
117
118    // Convert to local TokenUsage
119    let usage: TokenUsage = lib_usage.into();
120
121    // Timestamp validation
122    let timestamp_str = msg
123        .timestamp
124        .as_deref()
125        .filter(|s| !s.is_empty())
126        .ok_or(FilterReason::InvalidTimestamp)?;
127    let timestamp: DateTime<Utc> = timestamp_str
128        .parse()
129        .map_err(|_| FilterReason::InvalidTimestamp)?;
130    if timestamp > now {
131        return Err(FilterReason::InvalidTimestamp);
132    }
133
134    Ok(ValidatedFields {
135        uuid: msg.uuid.unwrap_or_default(),
136        request_id: msg.request_id,
137        timestamp,
138        model,
139        usage,
140        stop_reason: api.stop_reason,
141        content: api.content,
142        agent_id: msg.agent_id,
143        service_tier,
144        speed,
145        inference_geo,
146        git_branch: msg.git_branch,
147    })
148}
149
150// ─── Pipeline Stage 4: Content Extraction ──────────────────────────────────
151
152/// Extracted content info from content blocks.
153struct ContentExtraction {
154    content_types: Vec<String>,
155    assistant_text: Option<String>,
156    tool_names: Vec<String>,
157    tool_error_count: usize,
158}
159
160fn extract_content(content: &Option<Vec<ContentBlock>>) -> ContentExtraction {
161    let mut content_types = Vec::new();
162    let mut text_parts = Vec::new();
163    let mut tool_names = Vec::new();
164    let mut tool_error_count = 0usize;
165
166    if let Some(blocks) = content {
167        for b in blocks {
168            match b {
169                ContentBlock::Text { text } => {
170                    content_types.push("text".to_string());
171                    if let Some(t) = text {
172                        text_parts.push(t.clone());
173                    }
174                }
175                ContentBlock::ToolUse { name, .. } => {
176                    content_types.push("tool_use".to_string());
177                    if let Some(n) = name {
178                        tool_names.push(n.clone());
179                    }
180                }
181                ContentBlock::Thinking { .. } => {
182                    content_types.push("thinking".to_string());
183                }
184                ContentBlock::ToolResult { is_error, .. } => {
185                    content_types.push("tool_result".to_string());
186                    if *is_error == Some(true) {
187                        tool_error_count += 1;
188                    }
189                }
190                _ => {
191                    content_types.push("other".to_string());
192                }
193            }
194        }
195    }
196
197    let assistant_text = if text_parts.is_empty() {
198        None
199    } else {
200        let full = text_parts.join("\n");
201        Some(if full.len() > 500 {
202            format!("{}...", &full[..full.floor_char_boundary(500)])
203        } else {
204            full
205        })
206    };
207
208    ContentExtraction {
209        content_types,
210        assistant_text,
211        tool_names,
212        tool_error_count,
213    }
214}
215
216// ─── Pipeline Stage 5: Streaming Deduplication ─────────────────────────────
217
218fn dedup_by_request_id(turns: Vec<ValidatedTurn>) -> (Vec<ValidatedTurn>, usize) {
219    let mut result = Vec::with_capacity(turns.len());
220    let mut request_id_index: HashMap<String, usize> = HashMap::new();
221    let mut dup_count = 0;
222
223    for turn in turns {
224        let rid = turn.request_id.clone().unwrap_or_default();
225        if !rid.is_empty() {
226            if let Some(&idx) = request_id_index.get(&rid) {
227                result[idx] = turn;
228                dup_count += 1;
229                continue;
230            }
231            request_id_index.insert(rid, result.len());
232        }
233        result.push(turn);
234    }
235
236    (result, dup_count)
237}
238
239// ─── Pipeline Orchestrator ─────────────────────────────────────────────────
240
241/// Parse a session JSONL file into validated turns, quality metrics, and session metadata.
242///
243/// Pipeline: JSON parse → type filter → validation → content extraction → deduplication.
244/// Also collects metadata from non-assistant/user entries (titles, tags, mode, PR links, etc.).
245pub fn parse_session_file(
246    path: &Path,
247    is_agent: bool,
248) -> Result<(Vec<ValidatedTurn>, DataQuality, SessionMetadata)> {
249    let file = File::open(path)
250        .with_context(|| format!("failed to open session file: {}", path.display()))?;
251    let reader = BufReader::new(file);
252
253    let mut quality = DataQuality::default();
254    let mut pre_dedup_turns = Vec::new();
255    let mut metadata = SessionMetadata::default();
256    let now = Utc::now();
257    let mut last_user_text: Option<String> = None;
258    let mut ai_title: Option<String> = None;
259    let mut custom_title: Option<String> = None;
260
261    for line_result in reader.lines() {
262        let line =
263            line_result.with_context(|| format!("failed to read line from {}", path.display()))?;
264        quality.total_lines += 1;
265
266        // Stage 1: JSON parse (via cc-session-jsonl)
267        let entry = match parse_line(&line) {
268            Some(e) => e,
269            None => {
270                quality.skipped_parse_error += 1;
271                continue;
272            }
273        };
274
275        // Stage 2: Type filter + metadata collection
276        let msg = match entry {
277            Entry::Assistant(msg) => {
278                // Count API errors even for entries that will fail validation
279                if msg.api_error.is_some() || msg.error.is_some() {
280                    metadata.api_error_count += 1;
281                }
282                msg
283            }
284            Entry::User(user_entry) => {
285                metadata.user_prompt_count += 1;
286                if let Some(text) = extract_user_text(&user_entry) {
287                    last_user_text = Some(text);
288                }
289                continue;
290            }
291            Entry::AiTitle(t) => {
292                if let Some(title) = t.ai_title {
293                    ai_title = Some(title);
294                }
295                continue;
296            }
297            Entry::CustomTitle(t) => {
298                if let Some(title) = t.custom_title {
299                    custom_title = Some(title);
300                }
301                continue;
302            }
303            Entry::Tag(t) => {
304                if let Some(tag) = t.tag {
305                    if !metadata.tags.contains(&tag) {
306                        metadata.tags.push(tag);
307                    }
308                }
309                continue;
310            }
311            Entry::Mode(m) => {
312                if let Some(mode) = m.mode {
313                    metadata.mode = Some(mode); // last-wins
314                }
315                continue;
316            }
317            Entry::PrLink(pr) => {
318                if let (Some(number), Some(url), Some(repo)) =
319                    (pr.pr_number, pr.pr_url, pr.pr_repository)
320                {
321                    // Avoid duplicate PR links
322                    if !metadata
323                        .pr_links
324                        .iter()
325                        .any(|p| p.number == number && p.repository == repo)
326                    {
327                        metadata.pr_links.push(PrLinkInfo {
328                            number,
329                            url,
330                            repository: repo,
331                        });
332                    }
333                }
334                continue;
335            }
336            Entry::SpeculationAccept(sa) => {
337                metadata.speculation_accepts += 1;
338                metadata.speculation_time_saved_ms += sa.time_saved_ms.unwrap_or(0.0);
339                continue;
340            }
341            Entry::QueueOperation(qo) => {
342                match qo.operation.as_deref() {
343                    Some("enqueue") => metadata.queue_enqueues += 1,
344                    Some("dequeue") => metadata.queue_dequeues += 1,
345                    _ => {}
346                }
347                continue;
348            }
349            Entry::ContextCollapseCommit(cc) => {
350                let collapse_id = cc.collapse_id.unwrap_or_default();
351                let summary = cc.summary.unwrap_or_default();
352                if !collapse_id.is_empty() || !summary.is_empty() {
353                    metadata.collapse_commits.push(CollapseCommit {
354                        collapse_id,
355                        summary,
356                    });
357                }
358                continue;
359            }
360            Entry::ContextCollapseSnapshot(cs) => {
361                // last-wins semantics for snapshot
362                let staged = cs.staged.unwrap_or_default();
363                let staged_count = staged.len();
364                let risks: Vec<f64> = staged.iter().filter_map(|s| s.risk).collect();
365                let avg_risk = if risks.is_empty() {
366                    0.0
367                } else {
368                    risks.iter().sum::<f64>() / risks.len() as f64
369                };
370                let max_risk = risks.iter().cloned().fold(0.0f64, f64::max);
371                metadata.collapse_snapshot = Some(CollapseSnapshot {
372                    staged_count,
373                    avg_risk,
374                    max_risk,
375                    armed: cs.armed.unwrap_or(false),
376                    last_spawn_tokens: cs.last_spawn_tokens.unwrap_or(0),
377                });
378                continue;
379            }
380            Entry::AttributionSnapshot(a) => {
381                // last-wins semantics
382                let surface = a.surface.unwrap_or_default();
383                let (file_count, total_contribution) =
384                    if let Some(obj) = a.file_states.as_ref().and_then(|v| v.as_object()) {
385                        let fc = obj.len();
386                        let tc: u64 = obj
387                            .values()
388                            .filter_map(|v| v.get("claudeContribution")?.as_u64())
389                            .sum();
390                        (fc, tc)
391                    } else {
392                        (0, 0)
393                    };
394                metadata.attribution = Some(AttributionData {
395                    surface,
396                    file_count,
397                    total_claude_contribution: total_contribution,
398                    prompt_count: a.prompt_count,
399                    escape_count: a.escape_count,
400                    permission_prompt_count: a.permission_prompt_count,
401                });
402                continue;
403            }
404            _ => continue,
405        };
406
407        // Stage 3: Validation
408        let fields = match validate_assistant(msg, is_agent, now) {
409            Ok(f) => f,
410            Err(FilterReason::Sidechain) => {
411                quality.skipped_sidechain += 1;
412                continue;
413            }
414            Err(FilterReason::Synthetic) => {
415                quality.skipped_synthetic += 1;
416                continue;
417            }
418            Err(_) => {
419                quality.skipped_invalid += 1;
420                continue;
421            }
422        };
423
424        // Stage 4: Content extraction
425        let extracted = extract_content(&fields.content);
426
427        pre_dedup_turns.push(ValidatedTurn {
428            uuid: fields.uuid,
429            request_id: fields.request_id,
430            timestamp: fields.timestamp,
431            model: fields.model,
432            usage: fields.usage,
433            stop_reason: fields.stop_reason,
434            content_types: extracted.content_types,
435            is_agent,
436            agent_id: fields.agent_id,
437            user_text: last_user_text.take(),
438            assistant_text: extracted.assistant_text,
439            tool_names: extracted.tool_names,
440            service_tier: fields.service_tier,
441            speed: fields.speed,
442            inference_geo: fields.inference_geo,
443            tool_error_count: extracted.tool_error_count,
444            git_branch: fields.git_branch,
445        });
446    }
447
448    // Stage 5: Streaming deduplication
449    let (turns, dup_count) = dedup_by_request_id(pre_dedup_turns);
450    quality.duplicate_turns = dup_count;
451    quality.valid_turns = turns.len();
452
453    // Finalize title: custom-title overrides ai-title
454    metadata.title = custom_title.or(ai_title);
455
456    Ok((turns, quality, metadata))
457}
458
459#[cfg(test)]
460mod tests {
461    use super::*;
462    use std::io::Write;
463    use tempfile::NamedTempFile;
464
465    const VALID_ASSISTANT: &str = r#"{"type":"assistant","uuid":"u1","timestamp":"2026-03-16T10:00:00Z","message":{"model":"claude-opus-4-6","role":"assistant","stop_reason":"end_turn","usage":{"input_tokens":3,"output_tokens":100,"cache_creation_input_tokens":500,"cache_read_input_tokens":10000},"content":[{"type":"text","text":"hi"}]},"sessionId":"s1","cwd":"/tmp","gitBranch":"","userType":"external","isSidechain":false,"parentUuid":null,"requestId":"r1"}"#;
466
467    fn write_jsonl(lines: &[&str]) -> NamedTempFile {
468        let mut f = NamedTempFile::new().unwrap();
469        for line in lines {
470            writeln!(f, "{}", line).unwrap();
471        }
472        f.flush().unwrap();
473        f
474    }
475
476    #[test]
477    fn parse_valid_assistant_turn() {
478        let f = write_jsonl(&[VALID_ASSISTANT]);
479        let (turns, quality, _meta) = parse_session_file(f.path(), false).unwrap();
480
481        assert_eq!(turns.len(), 1);
482        assert_eq!(quality.valid_turns, 1);
483        assert_eq!(turns[0].model, "claude-opus-4-6");
484        assert_eq!(turns[0].uuid, "u1");
485        assert!(!turns[0].is_agent);
486        assert_eq!(turns[0].content_types, vec!["text"]);
487    }
488
489    #[test]
490    fn filters_synthetic_messages() {
491        let synthetic = r#"{"type":"assistant","uuid":"u1","timestamp":"2026-03-16T10:00:00Z","message":{"model":"<synthetic>","role":"assistant","stop_reason":"end_turn","usage":{"input_tokens":0,"output_tokens":0,"cache_creation_input_tokens":0,"cache_read_input_tokens":0},"content":[{"type":"text","text":"hi"}]},"sessionId":"s1","cwd":"/tmp","gitBranch":"","userType":"external","isSidechain":false,"parentUuid":null,"requestId":"r1"}"#;
492        let f = write_jsonl(&[synthetic]);
493        let (turns, quality, _meta) = parse_session_file(f.path(), false).unwrap();
494
495        assert_eq!(turns.len(), 0);
496        assert_eq!(quality.skipped_synthetic, 1);
497    }
498
499    #[test]
500    fn filters_zero_usage() {
501        let zero_usage = r#"{"type":"assistant","uuid":"u1","timestamp":"2026-03-16T10:00:00Z","message":{"model":"claude-opus-4-6","role":"assistant","stop_reason":"end_turn","usage":{"input_tokens":0,"output_tokens":0,"cache_creation_input_tokens":0,"cache_read_input_tokens":0},"content":[{"type":"text","text":"hi"}]},"sessionId":"s1","cwd":"/tmp","gitBranch":"","userType":"external","isSidechain":false,"parentUuid":null,"requestId":"r1"}"#;
502        let f = write_jsonl(&[zero_usage]);
503        let (turns, quality, _meta) = parse_session_file(f.path(), false).unwrap();
504
505        assert_eq!(turns.len(), 0);
506        assert_eq!(quality.skipped_invalid, 1);
507    }
508
509    #[test]
510    fn deduplicates_turns() {
511        let f = write_jsonl(&[VALID_ASSISTANT, VALID_ASSISTANT]);
512        let (turns, quality, _meta) = parse_session_file(f.path(), false).unwrap();
513
514        assert_eq!(turns.len(), 1);
515        assert_eq!(quality.duplicate_turns, 1);
516    }
517
518    #[test]
519    fn skips_malformed_lines() {
520        let f = write_jsonl(&["not valid json at all", VALID_ASSISTANT]);
521        let (turns, quality, _meta) = parse_session_file(f.path(), false).unwrap();
522
523        assert_eq!(turns.len(), 1);
524        assert_eq!(quality.skipped_parse_error, 1);
525    }
526
527    #[test]
528    fn non_assistant_types_not_counted_as_parse_error() {
529        // Note: "progress" is not a named variant in cc-session-jsonl, it maps to Unknown
530        let progress = r#"{"type":"progress","data":{"type":"hook_progress"},"uuid":"u1","timestamp":"2026-03-16T13:51:19.053Z","sessionId":"s1"}"#;
531        let system = r#"{"type":"system","subtype":"turn_duration","durationMs":1234,"uuid":"u2","timestamp":"2026-03-16T13:51:19.053Z","sessionId":"s1"}"#;
532        let last_prompt = r#"{"type":"last-prompt","lastPrompt":"hello","sessionId":"s1"}"#;
533        let f = write_jsonl(&[progress, system, last_prompt, VALID_ASSISTANT]);
534        let (turns, quality, _meta) = parse_session_file(f.path(), false).unwrap();
535
536        assert_eq!(turns.len(), 1);
537        assert_eq!(
538            quality.skipped_parse_error, 0,
539            "known entry types should not be parse errors"
540        );
541        assert_eq!(quality.total_lines, 4);
542    }
543
544    #[test]
545    fn parses_thinking_content_blocks() {
546        let with_thinking = r#"{"type":"assistant","uuid":"u1","timestamp":"2026-03-16T10:00:00Z","message":{"model":"claude-opus-4-6","role":"assistant","stop_reason":"end_turn","usage":{"input_tokens":3,"output_tokens":100,"cache_creation_input_tokens":500,"cache_read_input_tokens":10000},"content":[{"type":"thinking","thinking":"hmm","signature":"sig"},{"type":"text","text":"answer"}]},"sessionId":"s1","cwd":"/tmp","gitBranch":"","userType":"external","isSidechain":false,"parentUuid":null,"requestId":"r1"}"#;
547        let f = write_jsonl(&[with_thinking]);
548        let (turns, quality, _meta) = parse_session_file(f.path(), false).unwrap();
549
550        assert_eq!(turns.len(), 1);
551        assert_eq!(quality.valid_turns, 1);
552        assert!(turns[0].content_types.contains(&"thinking".to_string()));
553        assert!(turns[0].content_types.contains(&"text".to_string()));
554    }
555
556    #[test]
557    fn filters_sidechain_turns() {
558        let sidechain = r#"{"type":"assistant","uuid":"u2","timestamp":"2026-03-16T10:00:00Z","message":{"model":"claude-opus-4-6","role":"assistant","stop_reason":"end_turn","usage":{"input_tokens":3,"output_tokens":100,"cache_creation_input_tokens":500,"cache_read_input_tokens":10000},"content":[{"type":"text","text":"abandoned"}]},"sessionId":"s1","cwd":"/tmp","gitBranch":"","userType":"external","isSidechain":true,"parentUuid":"p1","requestId":"r2"}"#;
559        let f = write_jsonl(&[sidechain, VALID_ASSISTANT]);
560        let (turns, quality, _meta) = parse_session_file(f.path(), false).unwrap();
561
562        assert_eq!(turns.len(), 1, "sidechain turn should be filtered out");
563        assert_eq!(quality.skipped_sidechain, 1);
564        assert_eq!(turns[0].uuid, "u1", "only main-chain turn should remain");
565    }
566
567    // ─── Pipeline unit tests ───────────────────────────────────────────
568
569    #[test]
570    fn dedup_preserves_last_entry() {
571        let t1 = ValidatedTurn {
572            uuid: "u1".into(),
573            request_id: Some("r1".into()),
574            timestamp: "2026-03-16T10:00:00Z".parse().unwrap(),
575            model: "m".into(),
576            usage: Default::default(),
577            stop_reason: None,
578            content_types: vec![],
579            is_agent: false,
580            agent_id: None,
581            user_text: None,
582            assistant_text: Some("first".into()),
583            tool_names: vec![],
584            service_tier: None,
585            speed: None,
586            inference_geo: None,
587            tool_error_count: 0,
588            git_branch: None,
589        };
590        let t2 = ValidatedTurn {
591            uuid: "u2".into(),
592            request_id: Some("r1".into()),
593            timestamp: "2026-03-16T10:00:01Z".parse().unwrap(),
594            model: "m".into(),
595            usage: Default::default(),
596            stop_reason: None,
597            content_types: vec![],
598            is_agent: false,
599            agent_id: None,
600            user_text: None,
601            assistant_text: Some("second".into()),
602            tool_names: vec![],
603            service_tier: None,
604            speed: None,
605            inference_geo: None,
606            tool_error_count: 0,
607            git_branch: None,
608        };
609        let (result, dup) = dedup_by_request_id(vec![t1, t2]);
610        assert_eq!(result.len(), 1);
611        assert_eq!(dup, 1);
612        assert_eq!(result[0].assistant_text.as_deref(), Some("second"));
613    }
614
615    #[test]
616    fn extract_content_handles_all_types() {
617        let blocks = vec![
618            ContentBlock::Text {
619                text: Some("hello".into()),
620            },
621            ContentBlock::ToolUse {
622                id: None,
623                name: Some("Bash".into()),
624                input: None,
625            },
626            ContentBlock::Thinking {
627                thinking: Some("hmm".into()),
628                signature: None,
629            },
630            ContentBlock::ToolResult {
631                tool_use_id: None,
632                content: None,
633                is_error: None,
634            },
635            ContentBlock::Other,
636        ];
637        let extracted = extract_content(&Some(blocks));
638        assert_eq!(
639            extracted.content_types,
640            vec!["text", "tool_use", "thinking", "tool_result", "other"]
641        );
642        assert_eq!(extracted.assistant_text.as_deref(), Some("hello"));
643        assert_eq!(extracted.tool_names, vec!["Bash"]);
644        assert_eq!(extracted.tool_error_count, 0);
645    }
646
647    #[test]
648    fn extract_content_counts_tool_errors() {
649        let blocks = vec![
650            ContentBlock::ToolResult {
651                tool_use_id: None,
652                content: None,
653                is_error: Some(true),
654            },
655            ContentBlock::ToolResult {
656                tool_use_id: None,
657                content: None,
658                is_error: Some(false),
659            },
660            ContentBlock::ToolResult {
661                tool_use_id: None,
662                content: None,
663                is_error: Some(true),
664            },
665        ];
666        let extracted = extract_content(&Some(blocks));
667        assert_eq!(extracted.tool_error_count, 2);
668    }
669
670    #[test]
671    fn collects_metadata_from_entries() {
672        let user = r#"{"type":"user","uuid":"u0","sessionId":"s1","message":{"role":"user","content":"hello"}}"#;
673        let ai_title = r#"{"type":"ai-title","sessionId":"s1","aiTitle":"AI Generated Title"}"#;
674        let custom_title =
675            r#"{"type":"custom-title","sessionId":"s1","customTitle":"My Custom Title"}"#;
676        let tag1 = r#"{"type":"tag","sessionId":"s1","tag":"bugfix"}"#;
677        let tag2 = r#"{"type":"tag","sessionId":"s1","tag":"release"}"#;
678        let mode = r#"{"type":"mode","sessionId":"s1","mode":"code"}"#;
679        let pr = r#"{"type":"pr-link","sessionId":"s1","prNumber":42,"prUrl":"https://github.com/user/repo/pull/42","prRepository":"user/repo"}"#;
680        let spec = r#"{"type":"speculation-accept","timestamp":"2026-03-16T10:00:00Z","timeSavedMs":500.0}"#;
681        let enq = r#"{"type":"queue-operation","sessionId":"s1","operation":"enqueue","timestamp":"2026-03-16T10:00:00Z"}"#;
682        let deq = r#"{"type":"queue-operation","sessionId":"s1","operation":"dequeue","timestamp":"2026-03-16T10:00:01Z"}"#;
683
684        let f = write_jsonl(&[
685            user,
686            ai_title,
687            custom_title,
688            tag1,
689            tag2,
690            mode,
691            pr,
692            spec,
693            enq,
694            deq,
695            VALID_ASSISTANT,
696        ]);
697        let (_turns, _quality, meta) = parse_session_file(f.path(), false).unwrap();
698
699        // custom-title overrides ai-title
700        assert_eq!(meta.title.as_deref(), Some("My Custom Title"));
701        assert_eq!(meta.tags, vec!["bugfix", "release"]);
702        assert_eq!(meta.mode.as_deref(), Some("code"));
703        assert_eq!(meta.pr_links.len(), 1);
704        assert_eq!(meta.pr_links[0].number, 42);
705        assert_eq!(meta.pr_links[0].repository, "user/repo");
706        assert_eq!(meta.speculation_accepts, 1);
707        assert!((meta.speculation_time_saved_ms - 500.0).abs() < f64::EPSILON);
708        assert_eq!(meta.queue_enqueues, 1);
709        assert_eq!(meta.queue_dequeues, 1);
710        assert_eq!(meta.user_prompt_count, 1);
711    }
712
713    #[test]
714    fn counts_api_errors() {
715        let error_entry = r#"{"type":"assistant","uuid":"err1","timestamp":"2026-03-16T10:00:00Z","sessionId":"s1","apiError":"rate_limit","error":"Rate limited"}"#;
716        let f = write_jsonl(&[error_entry, VALID_ASSISTANT]);
717        let (_turns, _quality, meta) = parse_session_file(f.path(), false).unwrap();
718
719        assert_eq!(meta.api_error_count, 1);
720    }
721}