Skip to main content

cc_token_usage/data/
parser.rs

1use anyhow::{Context, Result};
2use chrono::{DateTime, Utc};
3use std::collections::HashMap;
4use std::fs::File;
5use std::io::{BufRead, BufReader};
6use std::path::Path;
7
8use cc_session_jsonl::types::{ApiMessage, AssistantEntry, ContentBlock, Entry, UserEntry};
9
10use super::models::{
11    AttributionData, CollapseCommit, CollapseSnapshot, DataQuality, HookUsage, PrLinkInfo,
12    SessionMetadata, TokenUsage, ValidatedTurn,
13};
14
15// ─── Pipeline Stage 1: JSON Parse (now via cc-session-jsonl) ──────────────
16
17fn parse_line(line: &str) -> Option<Entry> {
18    cc_session_jsonl::parse_entry(line).ok()
19}
20
21// ─── Pipeline Stage 2: Type Filter + User Text Extraction ──────────────────
22
23/// Extract user message text (truncated to 500 chars) for pairing with assistant turns.
24fn extract_user_text(user_entry: &UserEntry) -> Option<String> {
25    let content_val = user_entry.message.as_ref()?.content.as_ref()?;
26
27    let text = if let Some(s) = content_val.as_str() {
28        s.to_string()
29    } else if let Some(arr) = content_val.as_array() {
30        arr.iter()
31            .filter_map(|b| {
32                if b.get("type").and_then(|t| t.as_str()) == Some("text") {
33                    b.get("text")
34                        .and_then(|t| t.as_str())
35                        .map(|s| s.to_string())
36                } else {
37                    None
38                }
39            })
40            .collect::<Vec<_>>()
41            .join("\n")
42    } else {
43        return None;
44    };
45
46    if text.is_empty() {
47        return None;
48    }
49
50    Some(if text.len() > 500 {
51        format!("{}...", &text[..text.floor_char_boundary(500)])
52    } else {
53        text
54    })
55}
56
57// ─── Pipeline Stage 3: Validation ──────────────────────────────────────────
58
59enum FilterReason {
60    NoApiMessage,
61    Sidechain,
62    Synthetic,
63    NoModel,
64    NoUsage,
65    ZeroUsage,
66    InvalidTimestamp,
67}
68
69struct ValidatedFields {
70    uuid: String,
71    request_id: Option<String>,
72    timestamp: DateTime<Utc>,
73    model: String,
74    usage: TokenUsage,
75    stop_reason: Option<String>,
76    content: Option<Vec<ContentBlock>>,
77    agent_id: Option<String>,
78    service_tier: Option<String>,
79    speed: Option<String>,
80    inference_geo: Option<String>,
81    git_branch: Option<String>,
82    attribution_plugin: Option<String>,
83    attribution_skill: Option<String>,
84}
85
86fn validate_assistant(
87    msg: AssistantEntry,
88    is_agent: bool,
89    now: DateTime<Utc>,
90) -> std::result::Result<ValidatedFields, FilterReason> {
91    let api: ApiMessage = msg.message.ok_or(FilterReason::NoApiMessage)?;
92
93    // Sidechain filter (skip for agent files -- they always have isSidechain=true)
94    if !is_agent && msg.is_sidechain == Some(true) {
95        return Err(FilterReason::Sidechain);
96    }
97
98    // Synthetic filter
99    if api.model.as_deref() == Some("<synthetic>") {
100        return Err(FilterReason::Synthetic);
101    }
102
103    let model = api.model.ok_or(FilterReason::NoModel)?;
104    let lib_usage = api.usage.ok_or(FilterReason::NoUsage)?;
105
106    // Non-zero usage
107    let total_tokens = lib_usage.input_tokens.unwrap_or(0)
108        + lib_usage.output_tokens.unwrap_or(0)
109        + lib_usage.cache_creation_input_tokens.unwrap_or(0)
110        + lib_usage.cache_read_input_tokens.unwrap_or(0);
111    if total_tokens == 0 {
112        return Err(FilterReason::ZeroUsage);
113    }
114
115    // Capture service fields before conversion consumes them
116    let service_tier = lib_usage.service_tier.clone();
117    let speed = lib_usage.speed.clone();
118    let inference_geo = lib_usage.inference_geo.clone();
119
120    // Convert to local TokenUsage
121    let usage: TokenUsage = lib_usage.into();
122
123    // Timestamp validation
124    let timestamp_str = msg
125        .timestamp
126        .as_deref()
127        .filter(|s| !s.is_empty())
128        .ok_or(FilterReason::InvalidTimestamp)?;
129    let timestamp: DateTime<Utc> = timestamp_str
130        .parse()
131        .map_err(|_| FilterReason::InvalidTimestamp)?;
132    if timestamp > now {
133        return Err(FilterReason::InvalidTimestamp);
134    }
135
136    Ok(ValidatedFields {
137        uuid: msg.uuid.unwrap_or_default(),
138        request_id: msg.request_id,
139        timestamp,
140        model,
141        usage,
142        stop_reason: api.stop_reason,
143        content: api.content,
144        agent_id: msg.agent_id,
145        service_tier,
146        speed,
147        inference_geo,
148        git_branch: msg.git_branch,
149        attribution_plugin: msg.attribution_plugin,
150        attribution_skill: msg.attribution_skill,
151    })
152}
153
154// ─── Pipeline Stage 4: Content Extraction ──────────────────────────────────
155
156/// Extracted content info from content blocks.
157struct ContentExtraction {
158    content_types: Vec<String>,
159    assistant_text: Option<String>,
160    tool_names: Vec<String>,
161    tool_error_count: usize,
162}
163
164fn extract_content(content: &Option<Vec<ContentBlock>>) -> ContentExtraction {
165    let mut content_types = Vec::new();
166    let mut text_parts = Vec::new();
167    let mut tool_names = Vec::new();
168    let mut tool_error_count = 0usize;
169
170    if let Some(blocks) = content {
171        for b in blocks {
172            match b {
173                ContentBlock::Text { text } => {
174                    content_types.push("text".to_string());
175                    if let Some(t) = text {
176                        text_parts.push(t.clone());
177                    }
178                }
179                ContentBlock::ToolUse { name, .. } => {
180                    content_types.push("tool_use".to_string());
181                    if let Some(n) = name {
182                        tool_names.push(n.clone());
183                    }
184                }
185                ContentBlock::Thinking { .. } => {
186                    content_types.push("thinking".to_string());
187                }
188                ContentBlock::ToolResult { is_error, .. } => {
189                    content_types.push("tool_result".to_string());
190                    if *is_error == Some(true) {
191                        tool_error_count += 1;
192                    }
193                }
194                _ => {
195                    content_types.push("other".to_string());
196                }
197            }
198        }
199    }
200
201    let assistant_text = if text_parts.is_empty() {
202        None
203    } else {
204        let full = text_parts.join("\n");
205        Some(if full.len() > 500 {
206            format!("{}...", &full[..full.floor_char_boundary(500)])
207        } else {
208            full
209        })
210    };
211
212    ContentExtraction {
213        content_types,
214        assistant_text,
215        tool_names,
216        tool_error_count,
217    }
218}
219
220// ─── Pipeline Stage 5: Streaming Deduplication ─────────────────────────────
221
222fn dedup_by_request_id(turns: Vec<ValidatedTurn>) -> (Vec<ValidatedTurn>, usize) {
223    let mut result = Vec::with_capacity(turns.len());
224    let mut request_id_index: HashMap<String, usize> = HashMap::new();
225    let mut dup_count = 0;
226
227    for turn in turns {
228        let rid = turn.request_id.clone().unwrap_or_default();
229        if !rid.is_empty() {
230            if let Some(&idx) = request_id_index.get(&rid) {
231                result[idx] = turn;
232                dup_count += 1;
233                continue;
234            }
235            request_id_index.insert(rid, result.len());
236        }
237        result.push(turn);
238    }
239
240    (result, dup_count)
241}
242
243// ─── Pipeline Orchestrator ─────────────────────────────────────────────────
244
245/// Parse a session JSONL file into validated turns, quality metrics, session
246/// metadata, and aggregated hook usage.
247///
248/// Pipeline: JSON parse → type filter → validation → content extraction → deduplication.
249/// Also collects metadata from non-assistant/user entries (titles, tags, mode,
250/// PR links, etc.) and aggregates `system`/`stop_hook_summary` entries into
251/// per-command `HookUsage` records (Claude Code 2.1.104+).
252///
253/// The returned `Vec<HookUsage>` is keyed by `hookInfos[].command`. It is
254/// always empty for agent files (subagent JSONL files have no system entries
255/// in observed data); callers should still accept the value for symmetry.
256pub fn parse_session_file(
257    path: &Path,
258    is_agent: bool,
259) -> Result<(
260    Vec<ValidatedTurn>,
261    DataQuality,
262    SessionMetadata,
263    Vec<HookUsage>,
264)> {
265    let file = File::open(path)
266        .with_context(|| format!("failed to open session file: {}", path.display()))?;
267    let reader = BufReader::new(file);
268
269    let mut quality = DataQuality::default();
270    let mut pre_dedup_turns = Vec::new();
271    let mut metadata = SessionMetadata::default();
272    let now = Utc::now();
273    let mut last_user_text: Option<String> = None;
274    let mut ai_title: Option<String> = None;
275    let mut custom_title: Option<String> = None;
276    // Hook aggregation: command -> HookUsage accumulator
277    let mut hook_acc: HashMap<String, HookUsage> = HashMap::new();
278
279    for line_result in reader.lines() {
280        let line =
281            line_result.with_context(|| format!("failed to read line from {}", path.display()))?;
282        quality.total_lines += 1;
283
284        // Stage 1: JSON parse (via cc-session-jsonl)
285        let entry = match parse_line(&line) {
286            Some(e) => e,
287            None => {
288                quality.skipped_parse_error += 1;
289                continue;
290            }
291        };
292
293        // Stage 2: Type filter + metadata collection
294        let msg = match entry {
295            Entry::Assistant(msg) => {
296                // Count API errors even for entries that will fail validation
297                if msg.api_error.is_some() || msg.error.is_some() {
298                    metadata.api_error_count += 1;
299                }
300                msg
301            }
302            Entry::User(user_entry) => {
303                metadata.user_prompt_count += 1;
304                if let Some(text) = extract_user_text(&user_entry) {
305                    last_user_text = Some(text);
306                }
307                continue;
308            }
309            Entry::AiTitle(t) => {
310                if let Some(title) = t.ai_title {
311                    ai_title = Some(title);
312                }
313                continue;
314            }
315            Entry::CustomTitle(t) => {
316                if let Some(title) = t.custom_title {
317                    custom_title = Some(title);
318                }
319                continue;
320            }
321            Entry::Tag(t) => {
322                if let Some(tag) = t.tag {
323                    if !metadata.tags.contains(&tag) {
324                        metadata.tags.push(tag);
325                    }
326                }
327                continue;
328            }
329            Entry::Mode(m) => {
330                if let Some(mode) = m.mode {
331                    metadata.mode = Some(mode); // last-wins
332                }
333                continue;
334            }
335            Entry::PrLink(pr) => {
336                if let (Some(number), Some(url), Some(repo)) =
337                    (pr.pr_number, pr.pr_url, pr.pr_repository)
338                {
339                    // Avoid duplicate PR links
340                    if !metadata
341                        .pr_links
342                        .iter()
343                        .any(|p| p.number == number && p.repository == repo)
344                    {
345                        metadata.pr_links.push(PrLinkInfo {
346                            number,
347                            url,
348                            repository: repo,
349                        });
350                    }
351                }
352                continue;
353            }
354            Entry::SpeculationAccept(sa) => {
355                metadata.speculation_accepts += 1;
356                metadata.speculation_time_saved_ms += sa.time_saved_ms.unwrap_or(0.0);
357                continue;
358            }
359            Entry::QueueOperation(qo) => {
360                match qo.operation.as_deref() {
361                    Some("enqueue") => metadata.queue_enqueues += 1,
362                    Some("dequeue") => metadata.queue_dequeues += 1,
363                    _ => {}
364                }
365                continue;
366            }
367            Entry::ContextCollapseCommit(cc) => {
368                let collapse_id = cc.collapse_id.unwrap_or_default();
369                let summary = cc.summary.unwrap_or_default();
370                if !collapse_id.is_empty() || !summary.is_empty() {
371                    metadata.collapse_commits.push(CollapseCommit {
372                        collapse_id,
373                        summary,
374                    });
375                }
376                continue;
377            }
378            Entry::ContextCollapseSnapshot(cs) => {
379                // last-wins semantics for snapshot
380                let staged = cs.staged.unwrap_or_default();
381                let staged_count = staged.len();
382                let risks: Vec<f64> = staged.iter().filter_map(|s| s.risk).collect();
383                let avg_risk = if risks.is_empty() {
384                    0.0
385                } else {
386                    risks.iter().sum::<f64>() / risks.len() as f64
387                };
388                let max_risk = risks.iter().cloned().fold(0.0f64, f64::max);
389                metadata.collapse_snapshot = Some(CollapseSnapshot {
390                    staged_count,
391                    avg_risk,
392                    max_risk,
393                    armed: cs.armed.unwrap_or(false),
394                    last_spawn_tokens: cs.last_spawn_tokens.unwrap_or(0),
395                });
396                continue;
397            }
398            Entry::System(sys) => {
399                // Aggregate stop_hook_summary entries by hookInfos[].command.
400                // Hook fields are only present when subtype == "stop_hook_summary"
401                // (Claude Code 2.1.104+). Older entries / other subtypes simply
402                // have None for hook_infos and fall through.
403                if sys.subtype.as_deref() == Some("stop_hook_summary") {
404                    let has_errors = sys
405                        .hook_errors
406                        .as_ref()
407                        .is_some_and(|errs| !errs.is_empty());
408                    let prevented = sys.prevented_continuation == Some(true);
409                    if let Some(infos) = sys.hook_infos {
410                        // Spec invariant 4: total invocations should equal
411                        // sum(SystemEntry.hookCount where subtype=stop_hook_summary).
412                        // On all observed 2.1.104+ samples, `hookCount` matches
413                        // `hookInfos.len()` — so per-element +1 below is correct.
414                        // If a future Claude Code release decouples the two
415                        // (e.g. truncates/samples `hookInfos`), the `invocations`
416                        // semantics must be re-evaluated; this debug_assert is
417                        // the canary that surfaces such drift immediately in
418                        // dev/test builds without paying any release cost.
419                        debug_assert_eq!(
420                            sys.hook_count.unwrap_or(infos.len() as u64) as usize,
421                            infos.len(),
422                            "hookCount field disagrees with hookInfos.len() — invocations semantics may need re-evaluation"
423                        );
424                        for info in infos {
425                            let cmd = info.command.unwrap_or_default();
426                            if cmd.is_empty() {
427                                continue;
428                            }
429                            let dur = info.duration_ms.unwrap_or(0);
430                            let entry = hook_acc.entry(cmd.clone()).or_insert_with(|| HookUsage {
431                                command: cmd,
432                                invocations: 0,
433                                total_duration_ms: 0,
434                                error_count: 0,
435                                prevented_continuation_count: 0,
436                            });
437                            entry.invocations += 1;
438                            entry.total_duration_ms += dur;
439                            if has_errors {
440                                entry.error_count += 1;
441                            }
442                            if prevented {
443                                entry.prevented_continuation_count += 1;
444                            }
445                        }
446                    }
447                }
448                continue;
449            }
450            Entry::AttributionSnapshot(a) => {
451                // last-wins semantics
452                let surface = a.surface.unwrap_or_default();
453                let (file_count, total_contribution) =
454                    if let Some(obj) = a.file_states.as_ref().and_then(|v| v.as_object()) {
455                        let fc = obj.len();
456                        let tc: u64 = obj
457                            .values()
458                            .filter_map(|v| v.get("claudeContribution")?.as_u64())
459                            .sum();
460                        (fc, tc)
461                    } else {
462                        (0, 0)
463                    };
464                metadata.attribution = Some(AttributionData {
465                    surface,
466                    file_count,
467                    total_claude_contribution: total_contribution,
468                    prompt_count: a.prompt_count,
469                    escape_count: a.escape_count,
470                    permission_prompt_count: a.permission_prompt_count,
471                });
472                continue;
473            }
474            _ => continue,
475        };
476
477        // Stage 3: Validation
478        let fields = match validate_assistant(msg, is_agent, now) {
479            Ok(f) => f,
480            Err(FilterReason::Sidechain) => {
481                quality.skipped_sidechain += 1;
482                continue;
483            }
484            Err(FilterReason::Synthetic) => {
485                quality.skipped_synthetic += 1;
486                continue;
487            }
488            Err(_) => {
489                quality.skipped_invalid += 1;
490                continue;
491            }
492        };
493
494        // Stage 4: Content extraction
495        let extracted = extract_content(&fields.content);
496
497        pre_dedup_turns.push(ValidatedTurn {
498            uuid: fields.uuid,
499            request_id: fields.request_id,
500            timestamp: fields.timestamp,
501            model: fields.model,
502            usage: fields.usage,
503            stop_reason: fields.stop_reason,
504            content_types: extracted.content_types,
505            is_agent,
506            agent_id: fields.agent_id,
507            user_text: last_user_text.take(),
508            assistant_text: extracted.assistant_text,
509            tool_names: extracted.tool_names,
510            service_tier: fields.service_tier,
511            speed: fields.speed,
512            inference_geo: fields.inference_geo,
513            tool_error_count: extracted.tool_error_count,
514            git_branch: fields.git_branch,
515            attribution_plugin: fields.attribution_plugin,
516            attribution_skill: fields.attribution_skill,
517        });
518    }
519
520    // Stage 5: Streaming deduplication
521    let (turns, dup_count) = dedup_by_request_id(pre_dedup_turns);
522    quality.duplicate_turns = dup_count;
523    quality.valid_turns = turns.len();
524
525    // Finalize title: custom-title overrides ai-title
526    metadata.title = custom_title.or(ai_title);
527
528    // Flatten the hook accumulator into a Vec with stable ordering by command.
529    let mut hooks: Vec<HookUsage> = hook_acc.into_values().collect();
530    hooks.sort_by(|a, b| a.command.cmp(&b.command));
531
532    Ok((turns, quality, metadata, hooks))
533}
534
535#[cfg(test)]
536mod tests {
537    use super::*;
538    use std::io::Write;
539    use tempfile::NamedTempFile;
540
541    const VALID_ASSISTANT: &str = r#"{"type":"assistant","uuid":"u1","timestamp":"2026-03-16T10:00:00Z","message":{"model":"claude-opus-4-6","role":"assistant","stop_reason":"end_turn","usage":{"input_tokens":3,"output_tokens":100,"cache_creation_input_tokens":500,"cache_read_input_tokens":10000},"content":[{"type":"text","text":"hi"}]},"sessionId":"s1","cwd":"/tmp","gitBranch":"","userType":"external","isSidechain":false,"parentUuid":null,"requestId":"r1"}"#;
542
543    fn write_jsonl(lines: &[&str]) -> NamedTempFile {
544        let mut f = NamedTempFile::new().unwrap();
545        for line in lines {
546            writeln!(f, "{}", line).unwrap();
547        }
548        f.flush().unwrap();
549        f
550    }
551
552    #[test]
553    fn parse_valid_assistant_turn() {
554        let f = write_jsonl(&[VALID_ASSISTANT]);
555        let (turns, quality, _meta, _hooks) = parse_session_file(f.path(), false).unwrap();
556
557        assert_eq!(turns.len(), 1);
558        assert_eq!(quality.valid_turns, 1);
559        assert_eq!(turns[0].model, "claude-opus-4-6");
560        assert_eq!(turns[0].uuid, "u1");
561        assert!(!turns[0].is_agent);
562        assert_eq!(turns[0].content_types, vec!["text"]);
563    }
564
565    #[test]
566    fn filters_synthetic_messages() {
567        let synthetic = r#"{"type":"assistant","uuid":"u1","timestamp":"2026-03-16T10:00:00Z","message":{"model":"<synthetic>","role":"assistant","stop_reason":"end_turn","usage":{"input_tokens":0,"output_tokens":0,"cache_creation_input_tokens":0,"cache_read_input_tokens":0},"content":[{"type":"text","text":"hi"}]},"sessionId":"s1","cwd":"/tmp","gitBranch":"","userType":"external","isSidechain":false,"parentUuid":null,"requestId":"r1"}"#;
568        let f = write_jsonl(&[synthetic]);
569        let (turns, quality, _meta, _hooks) = parse_session_file(f.path(), false).unwrap();
570
571        assert_eq!(turns.len(), 0);
572        assert_eq!(quality.skipped_synthetic, 1);
573    }
574
575    #[test]
576    fn filters_zero_usage() {
577        let zero_usage = r#"{"type":"assistant","uuid":"u1","timestamp":"2026-03-16T10:00:00Z","message":{"model":"claude-opus-4-6","role":"assistant","stop_reason":"end_turn","usage":{"input_tokens":0,"output_tokens":0,"cache_creation_input_tokens":0,"cache_read_input_tokens":0},"content":[{"type":"text","text":"hi"}]},"sessionId":"s1","cwd":"/tmp","gitBranch":"","userType":"external","isSidechain":false,"parentUuid":null,"requestId":"r1"}"#;
578        let f = write_jsonl(&[zero_usage]);
579        let (turns, quality, _meta, _hooks) = parse_session_file(f.path(), false).unwrap();
580
581        assert_eq!(turns.len(), 0);
582        assert_eq!(quality.skipped_invalid, 1);
583    }
584
585    #[test]
586    fn deduplicates_turns() {
587        let f = write_jsonl(&[VALID_ASSISTANT, VALID_ASSISTANT]);
588        let (turns, quality, _meta, _hooks) = parse_session_file(f.path(), false).unwrap();
589
590        assert_eq!(turns.len(), 1);
591        assert_eq!(quality.duplicate_turns, 1);
592    }
593
594    #[test]
595    fn skips_malformed_lines() {
596        let f = write_jsonl(&["not valid json at all", VALID_ASSISTANT]);
597        let (turns, quality, _meta, _hooks) = parse_session_file(f.path(), false).unwrap();
598
599        assert_eq!(turns.len(), 1);
600        assert_eq!(quality.skipped_parse_error, 1);
601    }
602
603    #[test]
604    fn non_assistant_types_not_counted_as_parse_error() {
605        // Note: "progress" is not a named variant in cc-session-jsonl, it maps to Unknown
606        let progress = r#"{"type":"progress","data":{"type":"hook_progress"},"uuid":"u1","timestamp":"2026-03-16T13:51:19.053Z","sessionId":"s1"}"#;
607        let system = r#"{"type":"system","subtype":"turn_duration","durationMs":1234,"uuid":"u2","timestamp":"2026-03-16T13:51:19.053Z","sessionId":"s1"}"#;
608        let last_prompt = r#"{"type":"last-prompt","lastPrompt":"hello","sessionId":"s1"}"#;
609        let f = write_jsonl(&[progress, system, last_prompt, VALID_ASSISTANT]);
610        let (turns, quality, _meta, _hooks) = parse_session_file(f.path(), false).unwrap();
611
612        assert_eq!(turns.len(), 1);
613        assert_eq!(
614            quality.skipped_parse_error, 0,
615            "known entry types should not be parse errors"
616        );
617        assert_eq!(quality.total_lines, 4);
618    }
619
620    #[test]
621    fn parses_thinking_content_blocks() {
622        let with_thinking = r#"{"type":"assistant","uuid":"u1","timestamp":"2026-03-16T10:00:00Z","message":{"model":"claude-opus-4-6","role":"assistant","stop_reason":"end_turn","usage":{"input_tokens":3,"output_tokens":100,"cache_creation_input_tokens":500,"cache_read_input_tokens":10000},"content":[{"type":"thinking","thinking":"hmm","signature":"sig"},{"type":"text","text":"answer"}]},"sessionId":"s1","cwd":"/tmp","gitBranch":"","userType":"external","isSidechain":false,"parentUuid":null,"requestId":"r1"}"#;
623        let f = write_jsonl(&[with_thinking]);
624        let (turns, quality, _meta, _hooks) = parse_session_file(f.path(), false).unwrap();
625
626        assert_eq!(turns.len(), 1);
627        assert_eq!(quality.valid_turns, 1);
628        assert!(turns[0].content_types.contains(&"thinking".to_string()));
629        assert!(turns[0].content_types.contains(&"text".to_string()));
630    }
631
632    #[test]
633    fn filters_sidechain_turns() {
634        let sidechain = r#"{"type":"assistant","uuid":"u2","timestamp":"2026-03-16T10:00:00Z","message":{"model":"claude-opus-4-6","role":"assistant","stop_reason":"end_turn","usage":{"input_tokens":3,"output_tokens":100,"cache_creation_input_tokens":500,"cache_read_input_tokens":10000},"content":[{"type":"text","text":"abandoned"}]},"sessionId":"s1","cwd":"/tmp","gitBranch":"","userType":"external","isSidechain":true,"parentUuid":"p1","requestId":"r2"}"#;
635        let f = write_jsonl(&[sidechain, VALID_ASSISTANT]);
636        let (turns, quality, _meta, _hooks) = parse_session_file(f.path(), false).unwrap();
637
638        assert_eq!(turns.len(), 1, "sidechain turn should be filtered out");
639        assert_eq!(quality.skipped_sidechain, 1);
640        assert_eq!(turns[0].uuid, "u1", "only main-chain turn should remain");
641    }
642
643    // ─── Pipeline unit tests ───────────────────────────────────────────
644
645    #[test]
646    fn dedup_preserves_last_entry() {
647        let t1 = ValidatedTurn {
648            uuid: "u1".into(),
649            request_id: Some("r1".into()),
650            timestamp: "2026-03-16T10:00:00Z".parse().unwrap(),
651            model: "m".into(),
652            usage: Default::default(),
653            stop_reason: None,
654            content_types: vec![],
655            is_agent: false,
656            agent_id: None,
657            user_text: None,
658            assistant_text: Some("first".into()),
659            tool_names: vec![],
660            service_tier: None,
661            speed: None,
662            inference_geo: None,
663            tool_error_count: 0,
664            git_branch: None,
665            attribution_plugin: None,
666            attribution_skill: None,
667        };
668        let t2 = ValidatedTurn {
669            uuid: "u2".into(),
670            request_id: Some("r1".into()),
671            timestamp: "2026-03-16T10:00:01Z".parse().unwrap(),
672            model: "m".into(),
673            usage: Default::default(),
674            stop_reason: None,
675            content_types: vec![],
676            is_agent: false,
677            agent_id: None,
678            user_text: None,
679            assistant_text: Some("second".into()),
680            tool_names: vec![],
681            service_tier: None,
682            speed: None,
683            inference_geo: None,
684            tool_error_count: 0,
685            git_branch: None,
686            attribution_plugin: None,
687            attribution_skill: None,
688        };
689        let (result, dup) = dedup_by_request_id(vec![t1, t2]);
690        assert_eq!(result.len(), 1);
691        assert_eq!(dup, 1);
692        assert_eq!(result[0].assistant_text.as_deref(), Some("second"));
693    }
694
695    #[test]
696    fn extract_content_handles_all_types() {
697        let blocks = vec![
698            ContentBlock::Text {
699                text: Some("hello".into()),
700            },
701            ContentBlock::ToolUse {
702                id: None,
703                name: Some("Bash".into()),
704                input: None,
705            },
706            ContentBlock::Thinking {
707                thinking: Some("hmm".into()),
708                signature: None,
709            },
710            ContentBlock::ToolResult {
711                tool_use_id: None,
712                content: None,
713                is_error: None,
714            },
715            ContentBlock::Other,
716        ];
717        let extracted = extract_content(&Some(blocks));
718        assert_eq!(
719            extracted.content_types,
720            vec!["text", "tool_use", "thinking", "tool_result", "other"]
721        );
722        assert_eq!(extracted.assistant_text.as_deref(), Some("hello"));
723        assert_eq!(extracted.tool_names, vec!["Bash"]);
724        assert_eq!(extracted.tool_error_count, 0);
725    }
726
727    #[test]
728    fn extract_content_counts_tool_errors() {
729        let blocks = vec![
730            ContentBlock::ToolResult {
731                tool_use_id: None,
732                content: None,
733                is_error: Some(true),
734            },
735            ContentBlock::ToolResult {
736                tool_use_id: None,
737                content: None,
738                is_error: Some(false),
739            },
740            ContentBlock::ToolResult {
741                tool_use_id: None,
742                content: None,
743                is_error: Some(true),
744            },
745        ];
746        let extracted = extract_content(&Some(blocks));
747        assert_eq!(extracted.tool_error_count, 2);
748    }
749
750    #[test]
751    fn collects_metadata_from_entries() {
752        let user = r#"{"type":"user","uuid":"u0","sessionId":"s1","message":{"role":"user","content":"hello"}}"#;
753        let ai_title = r#"{"type":"ai-title","sessionId":"s1","aiTitle":"AI Generated Title"}"#;
754        let custom_title =
755            r#"{"type":"custom-title","sessionId":"s1","customTitle":"My Custom Title"}"#;
756        let tag1 = r#"{"type":"tag","sessionId":"s1","tag":"bugfix"}"#;
757        let tag2 = r#"{"type":"tag","sessionId":"s1","tag":"release"}"#;
758        let mode = r#"{"type":"mode","sessionId":"s1","mode":"code"}"#;
759        let pr = r#"{"type":"pr-link","sessionId":"s1","prNumber":42,"prUrl":"https://github.com/user/repo/pull/42","prRepository":"user/repo"}"#;
760        let spec = r#"{"type":"speculation-accept","timestamp":"2026-03-16T10:00:00Z","timeSavedMs":500.0}"#;
761        let enq = r#"{"type":"queue-operation","sessionId":"s1","operation":"enqueue","timestamp":"2026-03-16T10:00:00Z"}"#;
762        let deq = r#"{"type":"queue-operation","sessionId":"s1","operation":"dequeue","timestamp":"2026-03-16T10:00:01Z"}"#;
763
764        let f = write_jsonl(&[
765            user,
766            ai_title,
767            custom_title,
768            tag1,
769            tag2,
770            mode,
771            pr,
772            spec,
773            enq,
774            deq,
775            VALID_ASSISTANT,
776        ]);
777        let (_turns, _quality, meta, _hooks) = parse_session_file(f.path(), false).unwrap();
778
779        // custom-title overrides ai-title
780        assert_eq!(meta.title.as_deref(), Some("My Custom Title"));
781        assert_eq!(meta.tags, vec!["bugfix", "release"]);
782        assert_eq!(meta.mode.as_deref(), Some("code"));
783        assert_eq!(meta.pr_links.len(), 1);
784        assert_eq!(meta.pr_links[0].number, 42);
785        assert_eq!(meta.pr_links[0].repository, "user/repo");
786        assert_eq!(meta.speculation_accepts, 1);
787        assert!((meta.speculation_time_saved_ms - 500.0).abs() < f64::EPSILON);
788        assert_eq!(meta.queue_enqueues, 1);
789        assert_eq!(meta.queue_dequeues, 1);
790        assert_eq!(meta.user_prompt_count, 1);
791    }
792
793    #[test]
794    fn counts_api_errors() {
795        let error_entry = r#"{"type":"assistant","uuid":"err1","timestamp":"2026-03-16T10:00:00Z","sessionId":"s1","apiError":"rate_limit","error":"Rate limited"}"#;
796        let f = write_jsonl(&[error_entry, VALID_ASSISTANT]);
797        let (_turns, _quality, meta, _hooks) = parse_session_file(f.path(), false).unwrap();
798
799        assert_eq!(meta.api_error_count, 1);
800    }
801
802    #[test]
803    fn parser_extracts_attribution_fields_to_turn() {
804        let with_attrib = r#"{"type":"assistant","uuid":"u1","timestamp":"2026-03-16T10:00:00Z","message":{"model":"claude-opus-4-6","role":"assistant","stop_reason":"end_turn","usage":{"input_tokens":3,"output_tokens":100,"cache_creation_input_tokens":500,"cache_read_input_tokens":10000},"content":[{"type":"text","text":"hi"}]},"sessionId":"s1","cwd":"/tmp","gitBranch":"","userType":"external","isSidechain":false,"parentUuid":null,"requestId":"r1","attributionPlugin":"superpowers","attributionSkill":"superpowers:brainstorming"}"#;
805        let f = write_jsonl(&[with_attrib]);
806        let (turns, _q, _m, _h) = parse_session_file(f.path(), false).unwrap();
807        assert_eq!(turns.len(), 1);
808        assert_eq!(turns[0].attribution_plugin.as_deref(), Some("superpowers"));
809        assert_eq!(
810            turns[0].attribution_skill.as_deref(),
811            Some("superpowers:brainstorming")
812        );
813    }
814
815    #[test]
816    fn parser_aggregates_stop_hook_summary_entries() {
817        // Three stop_hook_summary entries with two distinct commands, one with
818        // errors, one with preventedContinuation=true.
819        let asst = r#"{"type":"assistant","uuid":"u1","timestamp":"2026-03-16T10:00:00Z","message":{"model":"claude-opus-4-6","role":"assistant","stop_reason":"end_turn","usage":{"input_tokens":3,"output_tokens":100,"cache_creation_input_tokens":500,"cache_read_input_tokens":10000},"content":[{"type":"text","text":"hi"}]},"sessionId":"s1","cwd":"/tmp","gitBranch":"","userType":"external","isSidechain":false,"parentUuid":null,"requestId":"r1"}"#;
820        let h1 = r#"{"type":"system","subtype":"stop_hook_summary","hookCount":1,"hookInfos":[{"command":"alpha.sh","durationMs":10}],"hookErrors":[],"preventedContinuation":false,"sessionId":"s1"}"#;
821        let h2 = r#"{"type":"system","subtype":"stop_hook_summary","hookCount":1,"hookInfos":[{"command":"alpha.sh","durationMs":20}],"hookErrors":[{"err":"e"}],"preventedContinuation":true,"sessionId":"s1"}"#;
822        let h3 = r#"{"type":"system","subtype":"stop_hook_summary","hookCount":1,"hookInfos":[{"command":"beta.sh","durationMs":30}],"hookErrors":[],"preventedContinuation":false,"sessionId":"s1"}"#;
823        // A non-hook system entry should be ignored.
824        let irrelevant =
825            r#"{"type":"system","subtype":"turn_duration","durationMs":1234,"sessionId":"s1"}"#;
826        let f = write_jsonl(&[asst, h1, h2, h3, irrelevant]);
827        let (_t, _q, _m, hooks) = parse_session_file(f.path(), false).unwrap();
828        // Hooks are sorted by command name.
829        assert_eq!(hooks.len(), 2);
830        assert_eq!(hooks[0].command, "alpha.sh");
831        assert_eq!(hooks[0].invocations, 2);
832        assert_eq!(hooks[0].total_duration_ms, 30);
833        assert_eq!(hooks[0].error_count, 1);
834        assert_eq!(hooks[0].prevented_continuation_count, 1);
835        assert_eq!(hooks[1].command, "beta.sh");
836        assert_eq!(hooks[1].invocations, 1);
837    }
838}