Skip to main content

normalize_chat_sessions/
session.rs

1//! Unified session types for format-agnostic session representation.
2//!
3//! These types represent parsed session data in a normalized format,
4//! allowing consumers to work with sessions regardless of their source
5//! format (Claude Code, Gemini CLI, Codex, etc.).
6
7use serde::{Deserialize, Serialize};
8use std::path::PathBuf;
9
10/// A parsed session in unified format.
11#[derive(Debug, Clone, Serialize, Deserialize)]
12#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
13pub struct Session {
14    /// Path to the original session file.
15    pub path: PathBuf,
16    /// Name of the format that parsed this session.
17    pub format: String,
18    /// Session metadata (IDs, timestamps, provider info).
19    pub metadata: SessionMetadata,
20    /// Conversation turns (request/response pairs).
21    pub turns: Vec<Turn>,
22    /// Parent session ID (set when this session is a subagent).
23    #[serde(default, skip_serializing_if = "Option::is_none")]
24    pub parent_id: Option<String>,
25    /// Agent ID for subagent sessions (e.g. "agent-a5c5ccc9c2b61e757").
26    #[serde(default, skip_serializing_if = "Option::is_none")]
27    pub agent_id: Option<String>,
28    /// Subagent type (e.g. "general-purpose", "Explore", "Plan").
29    #[serde(default, skip_serializing_if = "Option::is_none")]
30    pub subagent_type: Option<String>,
31}
32
33/// Session metadata extracted from the log.
34#[derive(Debug, Clone, Default, Serialize, Deserialize)]
35#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
36pub struct SessionMetadata {
37    /// Session identifier (format-specific).
38    pub session_id: Option<String>,
39    /// Session start timestamp.
40    pub timestamp: Option<String>,
41    /// LLM provider (e.g., "anthropic", "google", "openai").
42    pub provider: Option<String>,
43    /// Model identifier.
44    pub model: Option<String>,
45    /// Project path or context.
46    pub project: Option<String>,
47}
48
49/// A single turn in the conversation (typically one user message + assistant response).
50#[derive(Debug, Clone, Default, Serialize, Deserialize)]
51#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
52pub struct Turn {
53    /// Messages in this turn.
54    pub messages: Vec<Message>,
55    /// Token usage for this turn (if available).
56    pub token_usage: Option<TokenUsage>,
57}
58
59/// A message from a participant.
60#[derive(Debug, Clone, Serialize, Deserialize)]
61#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
62pub struct Message {
63    /// Who sent this message.
64    pub role: Role,
65    /// Message content blocks.
66    pub content: Vec<ContentBlock>,
67    /// Timestamp of this message (if available).
68    pub timestamp: Option<String>,
69}
70
71/// Message sender role.
72#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
73#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
74#[serde(rename_all = "lowercase")]
75pub enum Role {
76    User,
77    Assistant,
78    System,
79    /// Tool result message (content returned to the model after a tool call).
80    /// Structurally sent as a "user" role in some formats, but semantically distinct.
81    Tool,
82}
83
84/// A content block within a message.
85#[derive(Debug, Clone, Serialize, Deserialize)]
86#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
87#[serde(tag = "type", rename_all = "snake_case")]
88pub enum ContentBlock {
89    /// Plain text content.
90    Text { text: String },
91    /// Tool invocation by the assistant.
92    ToolUse {
93        id: String,
94        name: String,
95        input: serde_json::Value,
96    },
97    /// Result of a tool invocation.
98    ToolResult {
99        tool_use_id: String,
100        content: String,
101        is_error: bool,
102    },
103    /// Thinking/reasoning content (e.g., Claude's extended thinking).
104    Thinking { text: String },
105}
106
107/// Token usage for an API call.
108#[derive(Debug, Clone, Default, Serialize, Deserialize)]
109#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
110pub struct TokenUsage {
111    /// Input tokens (prompt).
112    pub input: u64,
113    /// Output tokens (completion).
114    pub output: u64,
115    /// Tokens read from cache.
116    pub cache_read: Option<u64>,
117    /// Tokens written to cache.
118    pub cache_create: Option<u64>,
119    /// Model used for this API call (if known per-turn).
120    #[serde(default, skip_serializing_if = "Option::is_none")]
121    pub model: Option<String>,
122}
123
124impl Session {
125    /// Create a new empty session.
126    pub fn new(path: PathBuf, format: impl Into<String>) -> Self {
127        Self {
128            path,
129            format: format.into(),
130            metadata: SessionMetadata::default(),
131            turns: Vec::new(),
132            parent_id: None,
133            agent_id: None,
134            subagent_type: None,
135        }
136    }
137
138    /// Whether this session is a subagent (has a parent session).
139    pub fn is_subagent(&self) -> bool {
140        self.parent_id.is_some()
141    }
142
143    /// Total number of messages across all turns.
144    pub fn message_count(&self) -> usize {
145        self.turns.iter().map(|t| t.messages.len()).sum()
146    }
147
148    /// Count messages by role.
149    pub fn messages_by_role(&self, role: Role) -> usize {
150        self.turns
151            .iter()
152            .flat_map(|t| &t.messages)
153            .filter(|m| m.role == role)
154            .count()
155    }
156
157    /// Iterate over all tool use blocks.
158    pub fn tool_uses(&self) -> impl Iterator<Item = (&str, &serde_json::Value)> {
159        self.turns.iter().flat_map(|t| &t.messages).flat_map(|m| {
160            m.content.iter().filter_map(|block| match block {
161                ContentBlock::ToolUse { name, input, .. } => Some((name.as_str(), input)),
162                _ => None,
163            })
164        })
165    }
166
167    /// Iterate over all tool results.
168    pub fn tool_results(&self) -> impl Iterator<Item = (&str, bool)> {
169        self.turns.iter().flat_map(|t| &t.messages).flat_map(|m| {
170            m.content.iter().filter_map(|block| match block {
171                ContentBlock::ToolResult {
172                    content, is_error, ..
173                } => Some((content.as_str(), *is_error)),
174                _ => None,
175            })
176        })
177    }
178
179    /// Total token usage across all turns.
180    pub fn total_tokens(&self) -> TokenUsage {
181        let mut total = TokenUsage::default();
182        for turn in &self.turns {
183            if let Some(usage) = &turn.token_usage {
184                total.input += usage.input;
185                total.output += usage.output;
186                if let Some(cache_read) = usage.cache_read {
187                    *total.cache_read.get_or_insert(0) += cache_read;
188                }
189                if let Some(cache_create) = usage.cache_create {
190                    *total.cache_create.get_or_insert(0) += cache_create;
191                }
192            }
193        }
194        total
195    }
196}
197
198impl std::fmt::Display for Role {
199    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
200        match self {
201            Role::User => write!(f, "user"),
202            Role::Assistant => write!(f, "assistant"),
203            Role::System => write!(f, "system"),
204            Role::Tool => write!(f, "tool"),
205        }
206    }
207}