clawgs 0.2.0

Extract structured JSON snapshots from Claude Code and Codex JSONL transcripts
Documentation
use std::path::Path;

use anyhow::Result;
use serde_json::Value;

use super::{
    extract_timestamp, extract_tool_detail, push_action, read_jsonl, truncate, ParseSnapshot,
};
use crate::{Action, ExtractOptions};

pub(crate) fn parse(path: &Path, options: &ExtractOptions) -> Result<ParseSnapshot> {
    let parsed = read_jsonl(path, options.include_raw)?;

    let mut user_task: Option<String> = None;
    let mut recent_actions: Vec<Action> = Vec::new();
    let mut current_tool: Option<Action> = None;
    let mut token_count = 0u64;
    let mut awaiting_user_input = false;
    let mut awaiting_user_text: Option<String> = None;

    for entry in &parsed.entries {
        let ts = extract_timestamp(entry);
        update_user_task(entry, options, &mut user_task);
        update_token_count(entry, &mut token_count);
        update_awaiting_user_state(entry, &mut awaiting_user_input, &mut awaiting_user_text);
        record_actions(
            &mut recent_actions,
            &mut current_tool,
            assistant_actions(entry, options, &ts),
            options.max_actions,
        );
    }

    Ok(ParseSnapshot {
        user_task,
        recent_actions,
        current_tool,
        token_count,
        awaiting_user_input,
        awaiting_user_text,
        commit_signal: None,
        events_seen: parsed.entries.len() as u64,
        malformed_lines_skipped: parsed.malformed_lines_skipped,
        bytes_read: parsed.bytes_read,
        raw_events: parsed.raw_events,
    })
}

fn entry_type(entry: &Value) -> &str {
    entry
        .get("type")
        .and_then(Value::as_str)
        .unwrap_or_default()
}

fn message(entry: &Value) -> Option<&Value> {
    entry.get("message")
}

fn update_user_task(entry: &Value, options: &ExtractOptions, user_task: &mut Option<String>) {
    if let Some(text) = (entry_type(entry) == "user")
        .then_some(message(entry))
        .flatten()
        .and_then(|message| extract_user_text(Some(message)))
        .map(|text| truncate(&text, options.max_task_chars))
    {
        *user_task = Some(text);
    }
}

fn update_token_count(entry: &Value, token_count: &mut u64) {
    if let Some(value) = assistant_message(entry)
        .and_then(|message| message.get("usage"))
        .and_then(|usage| usage.get("input_tokens"))
        .and_then(Value::as_u64)
    {
        *token_count = value;
    }
}

fn update_awaiting_user_state(
    entry: &Value,
    awaiting_user_input: &mut bool,
    awaiting_user_text: &mut Option<String>,
) {
    if entry_type(entry) == "user" {
        *awaiting_user_input = false;
        *awaiting_user_text = None;
        return;
    }

    let Some(message) = assistant_message(entry) else {
        return;
    };

    if assistant_has_tool_use(message) {
        *awaiting_user_input = false;
        *awaiting_user_text = None;
        return;
    }

    if assistant_has_text(message) {
        let awaiting = assistant_turn_ended(message).unwrap_or(false);
        *awaiting_user_input = awaiting;
        *awaiting_user_text = awaiting.then(|| assistant_text(message)).flatten();
    }
}

fn assistant_actions(entry: &Value, options: &ExtractOptions, ts: &Option<String>) -> Vec<Action> {
    assistant_message(entry)
        .and_then(|message| message.get("content").and_then(Value::as_array))
        .map(|blocks| {
            blocks
                .iter()
                .filter_map(|block| block_action(block, options, ts))
                .collect()
        })
        .unwrap_or_default()
}

fn assistant_message(entry: &Value) -> Option<&Value> {
    (entry_type(entry) == "assistant")
        .then_some(message(entry))
        .flatten()
        .filter(|message| message.get("role").and_then(Value::as_str) == Some("assistant"))
}

fn assistant_has_tool_use(message: &Value) -> bool {
    message
        .get("content")
        .and_then(Value::as_array)
        .is_some_and(|blocks| {
            blocks
                .iter()
                .any(|block| block.get("type").and_then(Value::as_str) == Some("tool_use"))
        })
}

fn assistant_has_text(message: &Value) -> bool {
    message
        .get("content")
        .and_then(Value::as_array)
        .is_some_and(|blocks| {
            blocks.iter().any(|block| {
                block.get("type").and_then(Value::as_str) == Some("text")
                    && block
                        .get("text")
                        .and_then(Value::as_str)
                        .map(str::trim)
                        .is_some_and(|text| !text.is_empty())
            })
        })
}

fn assistant_text(message: &Value) -> Option<String> {
    message
        .get("content")
        .and_then(Value::as_array)
        .map(|blocks| {
            blocks
                .iter()
                .filter_map(|block| {
                    (block.get("type").and_then(Value::as_str) == Some("text"))
                        .then(|| block.get("text").and_then(Value::as_str))
                        .flatten()
                        .map(str::trim)
                        .filter(|text| !text.is_empty())
                })
                .collect::<Vec<_>>()
                .join(" ")
        })
        .map(|text| text.trim().to_string())
        .filter(|text| !text.is_empty())
}

fn assistant_turn_ended(message: &Value) -> Option<bool> {
    message
        .get("stop_reason")
        .and_then(Value::as_str)
        .map(|reason| reason == "end_turn")
}

fn block_action(block: &Value, options: &ExtractOptions, ts: &Option<String>) -> Option<Action> {
    match block
        .get("type")
        .and_then(Value::as_str)
        .unwrap_or_default()
    {
        "tool_use" => Some(tool_use_action(block, options, ts)),
        "text" => text_action(block, options, ts),
        _ => None,
    }
}

fn tool_use_action(block: &Value, options: &ExtractOptions, ts: &Option<String>) -> Action {
    Action {
        tool: block
            .get("name")
            .and_then(Value::as_str)
            .unwrap_or("unknown")
            .to_string(),
        detail: block
            .get("input")
            .and_then(|input| extract_tool_detail(input, options)),
        kind: "tool_use".to_string(),
        ts: ts.clone(),
    }
}

fn text_action(block: &Value, options: &ExtractOptions, ts: &Option<String>) -> Option<Action> {
    block
        .get("text")
        .and_then(Value::as_str)
        .map(str::trim)
        .filter(|text| text.chars().count() > 5)
        .map(|text| Action {
            tool: "said".to_string(),
            detail: Some(truncate(text, options.max_detail_chars)),
            kind: "text".to_string(),
            ts: ts.clone(),
        })
}

fn record_actions(
    recent_actions: &mut Vec<Action>,
    current_tool: &mut Option<Action>,
    actions: Vec<Action>,
    max_actions: usize,
) {
    for action in actions {
        let is_tool_use = action.kind == "tool_use";
        push_action(recent_actions, action.clone(), max_actions);
        if is_tool_use {
            *current_tool = Some(action);
        }
    }
}

fn extract_user_text(message: Option<&Value>) -> Option<String> {
    let content = user_message(message)?.get("content")?;
    content_text(content).or_else(|| content_block_text(content))
}

fn user_message(message: Option<&Value>) -> Option<&Value> {
    message.filter(|message| message.get("role").and_then(Value::as_str) == Some("user"))
}

fn content_text(content: &Value) -> Option<String> {
    content
        .as_str()
        .map(str::trim)
        .filter(|text| !text.is_empty())
        .map(ToString::to_string)
}

fn content_block_text(content: &Value) -> Option<String> {
    content.as_array()?.iter().find_map(text_block_content)
}

fn text_block_content(block: &Value) -> Option<String> {
    block
        .get("type")
        .and_then(Value::as_str)
        .filter(|block_type| *block_type == "text")
        .and_then(|_| block.get("text").and_then(Value::as_str))
        .map(str::trim)
        .filter(|text| !text.is_empty())
        .map(ToString::to_string)
}

#[cfg(test)]
mod tests {
    use std::fs;

    use super::*;
    use tempfile::NamedTempFile;

    #[test]
    fn parse_claude_extracts_task_tool_and_tokens() {
        let file = NamedTempFile::new().expect("temp file");
        fs::write(
            file.path(),
            concat!(
                "{\"type\":\"user\",\"message\":{\"role\":\"user\",\"content\":\"Summarize logs\"}}\n",
                "{\"type\":\"assistant\",\"message\":{\"role\":\"assistant\",\"usage\":{\"input_tokens\":88},\"content\":[{\"type\":\"tool_use\",\"name\":\"read_file\",\"input\":{\"file_path\":\"/tmp/demo.txt\"}}]}}\n"
            ),
        )
        .expect("write fixture");

        let options = ExtractOptions::default();
        let snapshot = parse(file.path(), &options).expect("parse");

        assert_eq!(snapshot.user_task.as_deref(), Some("Summarize logs"));
        assert_eq!(snapshot.token_count, 88);
        assert_eq!(snapshot.recent_actions.len(), 1);
        assert_eq!(snapshot.recent_actions[0].tool, "read_file");
        assert_eq!(
            snapshot.current_tool.as_ref().map(|a| a.tool.as_str()),
            Some("read_file")
        );
        assert!(!snapshot.awaiting_user_input);
    }

    #[test]
    fn parse_claude_marks_end_turn_text_as_awaiting_user() {
        let file = NamedTempFile::new().expect("temp file");
        fs::write(
            file.path(),
            "{\"type\":\"assistant\",\"message\":{\"role\":\"assistant\",\"stop_reason\":\"end_turn\",\"content\":[{\"type\":\"text\",\"text\":\"Which option do you want?\"}]}}\n",
        )
        .expect("write fixture");

        let snapshot = parse(file.path(), &ExtractOptions::default()).expect("parse");
        assert!(snapshot.awaiting_user_input);
        assert_eq!(
            snapshot.awaiting_user_text.as_deref(),
            Some("Which option do you want?")
        );
    }

    #[test]
    fn parse_claude_does_not_mark_tool_use_turn_as_awaiting_user() {
        let file = NamedTempFile::new().expect("temp file");
        fs::write(
            file.path(),
            "{\"type\":\"assistant\",\"message\":{\"role\":\"assistant\",\"stop_reason\":\"tool_use\",\"content\":[{\"type\":\"tool_use\",\"name\":\"Bash\",\"input\":{\"command\":\"pwd\"}}]}}\n",
        )
        .expect("write fixture");

        let snapshot = parse(file.path(), &ExtractOptions::default()).expect("parse");
        assert!(!snapshot.awaiting_user_input);
        assert!(snapshot.awaiting_user_text.is_none());
    }

    #[test]
    fn extract_user_text_supports_string_and_blocks() {
        let string_message = serde_json::json!({
            "role": "user",
            "content": "  summarize logs  "
        });
        let block_message = serde_json::json!({
            "role": "user",
            "content": [
                {"type": "text", "text": "  inspect parser  "}
            ]
        });

        assert_eq!(
            extract_user_text(Some(&string_message)).as_deref(),
            Some("summarize logs")
        );
        assert_eq!(
            extract_user_text(Some(&block_message)).as_deref(),
            Some("inspect parser")
        );
    }

    #[test]
    fn extract_user_text_rejects_non_user_messages() {
        let assistant_message = serde_json::json!({
            "role": "assistant",
            "content": "ignored"
        });

        assert_eq!(extract_user_text(Some(&assistant_message)), None);
        assert_eq!(extract_user_text(None), None);
    }

    #[test]
    fn text_action_filters_short_replies_by_character_count() {
        // The 5-char floor is meant to skip trivial chatter like "ok" or "yes".
        // Byte-length would let "完成" (2 chars, 6 bytes) slip past while
        // rejecting "Sorry" (5 chars, 5 bytes), which is inconsistent.
        let options = ExtractOptions::default();
        let ts = None;

        let short_cjk = serde_json::json!({"type": "text", "text": "完成"});
        assert!(
            text_action(&short_cjk, &options, &ts).is_none(),
            "2-char CJK reply must be filtered like a 2-char ASCII reply"
        );

        let medium_cjk = serde_json::json!({"type": "text", "text": "完成しました今日は"});
        assert!(
            text_action(&medium_cjk, &options, &ts).is_some(),
            "9-char CJK reply must pass the >5 floor"
        );

        let short_ascii = serde_json::json!({"type": "text", "text": "Sorry"});
        assert!(text_action(&short_ascii, &options, &ts).is_none());

        let medium_ascii = serde_json::json!({"type": "text", "text": "Sorry, retrying"});
        assert!(text_action(&medium_ascii, &options, &ts).is_some());
    }
}