agtrace_providers/gemini/
parser.rs

1use crate::Result;
2use agtrace_types::*;
3use chrono::DateTime;
4use std::path::Path;
5use uuid::Uuid;
6
7use crate::builder::{EventBuilder, SemanticSuffix};
8use crate::gemini::schema::{GeminiMessage, GeminiSession};
9
10/// Normalize Gemini session to events
11/// Unfolds nested structure (thoughts[], toolCalls[]) into event stream
12pub(crate) fn normalize_gemini_session(
13    session: &GeminiSession,
14    raw_messages: Vec<serde_json::Value>,
15) -> Vec<AgentEvent> {
16    // Create session_id UUID from session_id string (deterministic)
17    let session_id_uuid = Uuid::new_v5(&Uuid::NAMESPACE_OID, session.session_id.as_bytes());
18    let mut builder = EventBuilder::new(session_id_uuid);
19    let mut events = Vec::new();
20
21    for (idx, msg) in session.messages.iter().enumerate() {
22        let raw_value = raw_messages
23            .get(idx)
24            .cloned()
25            .unwrap_or(serde_json::Value::Null);
26
27        match msg {
28            GeminiMessage::User(user_msg) => {
29                // Skip numeric IDs (legacy CLI events)
30                if user_msg.id.parse::<u32>().is_ok() {
31                    continue;
32                }
33
34                let timestamp = parse_timestamp(&user_msg.timestamp);
35                builder.build_and_push(
36                    &mut events,
37                    &user_msg.id,
38                    SemanticSuffix::User,
39                    timestamp,
40                    EventPayload::User(UserPayload {
41                        text: user_msg.content.clone(),
42                    }),
43                    Some(raw_value),
44                    StreamId::Main,
45                );
46            }
47
48            GeminiMessage::Gemini(gemini_msg) => {
49                let timestamp = parse_timestamp(&gemini_msg.timestamp);
50                let base_id = &gemini_msg.id;
51
52                // 1. Reasoning events (thoughts)
53                for (idx, thought) in gemini_msg.thoughts.iter().enumerate() {
54                    let indexed_base_id = format!("{}-thought-{}", base_id, idx);
55                    builder.build_and_push(
56                        &mut events,
57                        &indexed_base_id,
58                        SemanticSuffix::Reasoning,
59                        timestamp,
60                        EventPayload::Reasoning(ReasoningPayload {
61                            text: format!("{}: {}", thought.subject, thought.description),
62                        }),
63                        Some(raw_value.clone()),
64                        StreamId::Main,
65                    );
66                }
67
68                // 2. Tool calls and results
69                for (idx, tool_call) in gemini_msg.tool_calls.iter().enumerate() {
70                    let indexed_base_id = format!("{}-tool-{}", base_id, idx);
71
72                    // ToolCall event
73                    let tool_call_uuid = builder.build_and_push(
74                        &mut events,
75                        &indexed_base_id,
76                        SemanticSuffix::ToolCall,
77                        timestamp,
78                        EventPayload::ToolCall(super::mapper::normalize_gemini_tool_call(
79                            tool_call.name.clone(),
80                            tool_call.args.clone(),
81                            Some(tool_call.id.clone()),
82                            tool_call.display_name.clone(),
83                        )),
84                        Some(raw_value.clone()),
85                        StreamId::Main,
86                    );
87
88                    // Register tool call ID mapping (provider ID -> UUID)
89                    builder.register_tool_call(tool_call.id.clone(), tool_call_uuid);
90
91                    // ToolResult event (if result exists)
92                    if !tool_call.result.is_empty() {
93                        let output = tool_call
94                            .result_display
95                            .clone()
96                            .unwrap_or_else(|| format!("{:?}", tool_call.result));
97
98                        let is_error = tool_call
99                            .status
100                            .as_ref()
101                            .map(|s| s == "error")
102                            .unwrap_or(false);
103
104                        builder.build_and_push(
105                            &mut events,
106                            &indexed_base_id,
107                            SemanticSuffix::ToolResult,
108                            timestamp,
109                            EventPayload::ToolResult(ToolResultPayload {
110                                output,
111                                tool_call_id: tool_call_uuid, // Reference to ToolCall UUID
112                                is_error,
113                                agent_id: None,
114                            }),
115                            Some(raw_value.clone()),
116                            StreamId::Main,
117                        );
118                    }
119                }
120
121                // 3. Message event (assistant response)
122                builder.build_and_push(
123                    &mut events,
124                    base_id,
125                    SemanticSuffix::Message,
126                    timestamp,
127                    EventPayload::Message(MessagePayload {
128                        text: gemini_msg.content.clone(),
129                    }),
130                    Some(raw_value.clone()),
131                    StreamId::Main,
132                );
133
134                // 4. TokenUsage event (sidecar attached to message)
135                // Gemini returns turn-level totals, so we attach to the last generation event
136                //
137                // Gemini Token Conversion Rationale:
138                //
139                // Input mapping (verified from gemini-cli telemetry):
140                //   cached   = cached (cached content tokens, from prompt caching)
141                //   uncached = input (fresh input tokens, not from cache)
142                //   Note: Gemini's schema guarantees: prompt = cached + input
143                //
144                // Output mapping (verified from gemini-cli schema):
145                //   generated = output (normal text generation)
146                //   reasoning = thoughts (thinking/reasoning tokens)
147                //   tool      = tool (tool call tokens)
148                //   Note: All three fields are explicit in Gemini's TokenUsage type
149                builder.build_and_push(
150                    &mut events,
151                    base_id,
152                    SemanticSuffix::TokenUsage,
153                    timestamp,
154                    EventPayload::TokenUsage(TokenUsagePayload::new(
155                        TokenInput::new(
156                            gemini_msg.tokens.cached as u64,
157                            gemini_msg.tokens.input as u64,
158                        ),
159                        TokenOutput::new(
160                            gemini_msg.tokens.output as u64,
161                            gemini_msg.tokens.thoughts as u64,
162                            gemini_msg.tokens.tool as u64,
163                        ),
164                    )),
165                    Some(raw_value),
166                    StreamId::Main,
167                );
168            }
169
170            GeminiMessage::Info(info_msg) => {
171                let timestamp = parse_timestamp(&info_msg.timestamp);
172                builder.build_and_push(
173                    &mut events,
174                    &info_msg.id,
175                    SemanticSuffix::Notification,
176                    timestamp,
177                    EventPayload::Notification(NotificationPayload {
178                        text: info_msg.content.clone(),
179                        level: Some("info".to_string()),
180                    }),
181                    Some(raw_value),
182                    StreamId::Main,
183                );
184            }
185        }
186    }
187
188    events
189}
190
191/// Parse Gemini timestamp to DateTime<Utc>
192fn parse_timestamp(ts: &str) -> DateTime<chrono::Utc> {
193    DateTime::parse_from_rfc3339(ts)
194        .map(|dt| dt.with_timezone(&chrono::Utc))
195        .unwrap_or_else(|_| chrono::Utc::now())
196}
197
198/// Gemini session parser implementation
199pub struct GeminiParser;
200
201impl crate::traits::SessionParser for GeminiParser {
202    fn parse_file(&self, path: &Path) -> Result<Vec<AgentEvent>> {
203        super::io::normalize_gemini_file(path)
204    }
205
206    fn parse_record(&self, content: &str) -> Result<Option<AgentEvent>> {
207        // Gemini uses JSON format (not JSONL), parse as AgentEvent
208        match serde_json::from_str::<AgentEvent>(content) {
209            Ok(event) => Ok(Some(event)),
210            Err(_) => Ok(None), // Skip malformed lines
211        }
212    }
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218    use crate::gemini::schema::{GeminiAssistantMessage, TokenUsage, UserMessage};
219
220    #[test]
221    fn test_normalize_user_message() {
222        let session = GeminiSession {
223            session_id: "test-session".to_string(),
224            project_hash: agtrace_types::ProjectHash::from("test-hash"),
225            start_time: "2024-01-01T00:00:00Z".to_string(),
226            last_updated: "2024-01-01T00:00:00Z".to_string(),
227            messages: vec![GeminiMessage::User(UserMessage {
228                id: "uuid-123".to_string(),
229                timestamp: "2024-01-01T00:00:00Z".to_string(),
230                content: "Hello".to_string(),
231            })],
232        };
233
234        let events = normalize_gemini_session(&session, vec![]);
235        assert_eq!(events.len(), 1);
236
237        match &events[0].payload {
238            EventPayload::User(payload) => assert_eq!(payload.text, "Hello"),
239            _ => panic!("Expected User payload"),
240        }
241        assert_eq!(events[0].parent_id, None);
242    }
243
244    #[test]
245    fn test_normalize_assistant_with_tokens() {
246        let session = GeminiSession {
247            session_id: "test-session".to_string(),
248            project_hash: agtrace_types::ProjectHash::from("test-hash"),
249            start_time: "2024-01-01T00:00:00Z".to_string(),
250            last_updated: "2024-01-01T00:00:00Z".to_string(),
251            messages: vec![GeminiMessage::Gemini(GeminiAssistantMessage {
252                id: "uuid-456".to_string(),
253                timestamp: "2024-01-01T00:00:01Z".to_string(),
254                content: "Hello back!".to_string(),
255                model: "gemini-2.0-flash".to_string(),
256                thoughts: vec![],
257                tool_calls: vec![],
258                tokens: TokenUsage {
259                    input: 8,  // uncached input tokens
260                    output: 4, // normal text generation tokens (total output = output + thoughts + tool = 4 + 1 + 0 = 5)
261                    total: 15,
262                    cached: 2, // cached input tokens (total input = cached + input = 2 + 8 = 10)
263                    thoughts: 1,
264                    tool: 0,
265                },
266            })],
267        };
268
269        let events = normalize_gemini_session(&session, vec![]);
270        // Should have: Message + TokenUsage (2 events)
271        assert_eq!(events.len(), 2);
272
273        match &events[0].payload {
274            EventPayload::Message(payload) => assert_eq!(payload.text, "Hello back!"),
275            _ => panic!("Expected Message payload"),
276        }
277
278        match &events[1].payload {
279            EventPayload::TokenUsage(payload) => {
280                assert_eq!(payload.input.total(), 10);
281                assert_eq!(payload.output.total(), 5);
282                assert_eq!(payload.total_tokens(), 15);
283                assert_eq!(payload.input.cached, 2);
284                assert_eq!(payload.output.reasoning, 1);
285            }
286            _ => panic!("Expected TokenUsage payload"),
287        }
288    }
289}