1use std::collections::HashMap;
12
13use chrono::{DateTime, Utc};
14use serde::Deserialize;
15
16use crate::model::{Event, EventBuilder, EventType};
17use crate::util::truncate_string;
18
19#[derive(Debug, Clone)]
21pub struct FunctionCallInfo {
22 pub name: String,
24 pub arguments: String,
26 pub requires_approval: bool,
28 pub timestamp: Option<DateTime<Utc>>,
30}
31
32pub type FunctionCallMap = HashMap<String, FunctionCallInfo>;
34
35#[derive(Debug, Clone, Deserialize)]
39pub struct CodexSessionEntry {
40 pub timestamp: String,
42
43 #[serde(rename = "type")]
45 pub entry_type: String,
46
47 pub payload: serde_json::Value,
49}
50
51#[derive(Debug, Clone, Deserialize)]
53pub struct SessionMeta {
54 pub id: String,
56 pub timestamp: String,
58 pub cwd: Option<String>,
60 pub cli_version: Option<String>,
62 pub model_provider: Option<String>,
64 pub git: Option<GitInfo>,
66}
67
68#[derive(Debug, Clone, Deserialize)]
70pub struct GitInfo {
71 pub commit_hash: Option<String>,
72 pub branch: Option<String>,
73 pub repository_url: Option<String>,
74}
75
76#[derive(Debug, Clone, Deserialize)]
78pub struct TokenUsageInfo {
79 pub total_token_usage: Option<TokenUsage>,
80 pub last_token_usage: Option<TokenUsage>,
81 pub model_context_window: Option<i64>,
82}
83
84#[derive(Debug, Clone, Deserialize)]
86pub struct TokenUsage {
87 pub input_tokens: Option<i64>,
88 pub cached_input_tokens: Option<i64>,
89 pub output_tokens: Option<i64>,
90 pub reasoning_output_tokens: Option<i64>,
91 pub total_tokens: Option<i64>,
92}
93
94#[derive(Debug, Clone, Deserialize)]
96pub struct TurnContext {
97 pub cwd: Option<String>,
98 pub model: Option<String>,
99 pub approval_policy: Option<String>,
100}
101
102impl CodexSessionEntry {
103 fn parse_timestamp(&self) -> Option<DateTime<Utc>> {
105 chrono::DateTime::parse_from_rfc3339(&self.timestamp)
106 .ok()
107 .map(|dt| dt.with_timezone(&Utc))
108 }
109
110 pub fn into_events(
117 self,
118 machine_id: &str,
119 session_id: &str,
120 function_call_map: &mut FunctionCallMap,
121 session_meta: &Option<SessionMeta>,
122 turn_model: &mut Option<String>,
123 ) -> Vec<Event> {
124 let timestamp = self.parse_timestamp();
125
126 match self.entry_type.as_str() {
127 "session_meta" => {
128 vec![]
131 }
132
133 "turn_context" => {
134 if let Ok(ctx) = serde_json::from_value::<TurnContext>(self.payload) {
136 *turn_model = ctx.model;
137 }
138 vec![]
139 }
140
141 "event_msg" => {
142 self.parse_event_msg(machine_id, session_id, timestamp, session_meta, turn_model)
143 }
144
145 "response_item" => self.parse_response_item(
146 machine_id,
147 session_id,
148 timestamp,
149 function_call_map,
150 session_meta,
151 turn_model,
152 ),
153
154 _ => vec![],
155 }
156 }
157
158 fn parse_event_msg(
160 self,
161 machine_id: &str,
162 session_id: &str,
163 timestamp: Option<DateTime<Utc>>,
164 session_meta: &Option<SessionMeta>,
165 turn_model: &Option<String>,
166 ) -> Vec<Event> {
167 let msg_type = self
168 .payload
169 .get("type")
170 .and_then(|v| v.as_str())
171 .unwrap_or("");
172
173 match msg_type {
174 "token_count" => {
175 let info = self.payload.get("info");
177 if info.is_none() || info.is_some_and(|v| v.is_null()) {
178 return vec![];
179 }
180
181 let info: TokenUsageInfo =
182 match serde_json::from_value(info.cloned().unwrap_or_default()) {
183 Ok(i) => i,
184 Err(_) => return vec![],
185 };
186
187 let usage = match info.last_token_usage {
189 Some(u) => u,
190 None => return vec![],
191 };
192
193 if usage.input_tokens.unwrap_or(0) == 0 && usage.output_tokens.unwrap_or(0) == 0 {
195 return vec![];
196 }
197
198 let mut builder = EventBuilder::new(machine_id, EventType::ApiRequest, session_id)
199 .source("codex_session")
200 .framework("codex")
201 .timestamp_opt(timestamp);
202
203 let input = usage.input_tokens.unwrap_or(0);
205 let output = usage.output_tokens.unwrap_or(0);
206 builder = builder.tokens(input, output);
207
208 if let Some(cache_read) = usage.cached_input_tokens {
210 builder = builder.tokens_cache_read_opt(Some(cache_read));
211 }
212
213 builder = builder.model_opt(turn_model.clone());
215
216 if let Some(meta) = session_meta {
218 builder = builder.cwd_opt(meta.cwd.clone());
219 if let Some(ref git) = meta.git {
220 builder = builder.git_branch_opt(git.branch.clone());
221 }
222 }
223
224 let mut extra = serde_json::Map::new();
226 if let Some(reasoning) = usage.reasoning_output_tokens {
227 extra.insert(
228 "reasoning_output_tokens".into(),
229 serde_json::json!(reasoning),
230 );
231 }
232 if let Some(ctx_window) = info.model_context_window {
233 extra.insert("model_context_window".into(), serde_json::json!(ctx_window));
234 }
235 if !extra.is_empty() {
236 builder = builder.metadata(serde_json::Value::Object(extra).to_string());
237 }
238
239 vec![builder.build()]
240 }
241
242 "user_message" => {
243 let message = self
245 .payload
246 .get("message")
247 .and_then(|v| v.as_str())
248 .unwrap_or("");
249
250 if message.is_empty() {
251 return vec![];
252 }
253
254 let mut builder =
255 EventBuilder::new(machine_id, EventType::UserPromptSubmit, session_id)
256 .source("codex_session")
257 .framework("codex")
258 .timestamp_opt(timestamp);
259
260 if let Some(meta) = session_meta {
262 builder = builder.cwd_opt(meta.cwd.clone());
263 if let Some(ref git) = meta.git {
264 builder = builder.git_branch_opt(git.branch.clone());
265 }
266 }
267
268 let truncated = truncate_string(message, 1000);
270 builder = builder.payload(serde_json::json!({"prompt": truncated}).to_string());
271
272 vec![builder.build()]
273 }
274
275 _ => vec![],
278 }
279 }
280
281 fn parse_response_item(
283 self,
284 machine_id: &str,
285 session_id: &str,
286 timestamp: Option<DateTime<Utc>>,
287 function_call_map: &mut FunctionCallMap,
288 session_meta: &Option<SessionMeta>,
289 turn_model: &Option<String>,
290 ) -> Vec<Event> {
291 let item_type = self
292 .payload
293 .get("type")
294 .and_then(|v| v.as_str())
295 .unwrap_or("");
296
297 match item_type {
298 "function_call" => {
299 let name = self
300 .payload
301 .get("name")
302 .and_then(|v| v.as_str())
303 .unwrap_or("")
304 .to_string();
305 let call_id = self
306 .payload
307 .get("call_id")
308 .and_then(|v| v.as_str())
309 .unwrap_or("")
310 .to_string();
311 let arguments = self
312 .payload
313 .get("arguments")
314 .and_then(|v| v.as_str())
315 .unwrap_or("")
316 .to_string();
317
318 if call_id.is_empty() {
319 return vec![];
320 }
321
322 let requires_approval = serde_json::from_str::<serde_json::Value>(&arguments)
325 .ok()
326 .and_then(|args| {
327 args.get("sandbox_permissions")
328 .and_then(|v| v.as_str())
329 .map(|s| s == "require_escalated")
330 })
331 .unwrap_or(false);
332
333 function_call_map.insert(
335 call_id.clone(),
336 FunctionCallInfo {
337 name: name.clone(),
338 arguments: arguments.clone(),
339 requires_approval,
340 timestamp,
341 },
342 );
343
344 let mut builder = EventBuilder::new(machine_id, EventType::PreToolUse, session_id)
346 .source("codex_session")
347 .framework("codex")
348 .timestamp_opt(timestamp)
349 .tool(call_id, name)
350 .model_opt(turn_model.clone());
351
352 if let Some(meta) = session_meta {
353 builder = builder.cwd_opt(meta.cwd.clone());
354 if let Some(ref git) = meta.git {
355 builder = builder.git_branch_opt(git.branch.clone());
356 }
357 }
358
359 vec![builder.build()]
360 }
361
362 "function_call_output" => {
363 let call_id = self
364 .payload
365 .get("call_id")
366 .and_then(|v| v.as_str())
367 .unwrap_or("")
368 .to_string();
369 let output = self
370 .payload
371 .get("output")
372 .and_then(|v| v.as_str())
373 .unwrap_or("");
374
375 if call_id.is_empty() {
376 return vec![];
377 }
378
379 let mut builder = EventBuilder::new(machine_id, EventType::PostToolUse, session_id)
381 .source("codex_session")
382 .framework("codex")
383 .timestamp_opt(timestamp)
384 .tool_use_id(call_id.clone())
385 .model_opt(turn_model.clone());
386
387 if let Some(call_info) = function_call_map.get(&call_id) {
389 builder = builder.tool_name(call_info.name.clone());
390
391 if call_info.name == "shell_command" {
393 if let Ok(args) =
395 serde_json::from_str::<serde_json::Value>(&call_info.arguments)
396 && let Some(cmd) = args.get("command")
397 {
398 builder =
399 builder.payload(serde_json::json!({ "command": cmd }).to_string());
400 }
401
402 let truncated_output = truncate_string(output, 500);
404 builder = builder.metadata(
405 serde_json::json!({ "output": truncated_output }).to_string(),
406 );
407 }
408 }
409
410 if let Some(meta) = session_meta {
411 builder = builder.cwd_opt(meta.cwd.clone());
412 if let Some(ref git) = meta.git {
413 builder = builder.git_branch_opt(git.branch.clone());
414 }
415 }
416
417 vec![builder.build()]
418 }
419
420 _ => vec![],
422 }
423 }
424
425 pub fn dedup_key(&self) -> String {
429 format!("{}:{}", self.timestamp, self.entry_type)
430 }
431}
432
433pub fn extract_session_id_from_filename(filename: &str) -> Option<String> {
438 let without_ext = filename.strip_suffix(".jsonl")?;
441 let parts: Vec<&str> = without_ext.rsplitn(6, '-').collect();
442
443 if parts.len() >= 5 {
445 let uuid = format!(
447 "{}-{}-{}-{}-{}",
448 parts[4], parts[3], parts[2], parts[1], parts[0]
449 );
450 if uuid.len() == 36 && uuid.chars().filter(|c| *c == '-').count() == 4 {
452 return Some(uuid);
453 }
454 }
455 None
456}
457
458pub fn parse_session_meta(entry: &CodexSessionEntry) -> Option<SessionMeta> {
460 if entry.entry_type != "session_meta" {
461 return None;
462 }
463 serde_json::from_value(entry.payload.clone()).ok()
464}
465
466#[cfg(test)]
467mod tests {
468 use super::*;
469
470 #[test]
471 fn test_extract_session_id_from_filename() {
472 assert_eq!(
473 extract_session_id_from_filename(
474 "rollout-2026-01-08T17-00-06-019ba044-90fa-7b30-be9c-6b7b601599cd.jsonl"
475 ),
476 Some("019ba044-90fa-7b30-be9c-6b7b601599cd".to_string())
477 );
478
479 assert_eq!(
480 extract_session_id_from_filename(
481 "rollout-2025-11-26T17-55-56-019ac306-3951-72d1-8b5d-447c33ce17f5.jsonl"
482 ),
483 Some("019ac306-3951-72d1-8b5d-447c33ce17f5".to_string())
484 );
485
486 assert_eq!(extract_session_id_from_filename("invalid.jsonl"), None);
487 assert_eq!(extract_session_id_from_filename("rollout.jsonl"), None);
488 }
489
490 #[test]
491 fn test_parse_session_meta() {
492 let entry = CodexSessionEntry {
493 timestamp: "2025-11-27T01:55:56.451Z".to_string(),
494 entry_type: "session_meta".to_string(),
495 payload: serde_json::json!({
496 "id": "019ac306-3951-72d1-8b5d-447c33ce17f5",
497 "timestamp": "2025-11-27T01:55:56.369Z",
498 "cwd": "/Users/storm/repos/test",
499 "cli_version": "0.63.0",
500 "model_provider": "openai",
501 "git": {
502 "commit_hash": "abc123",
503 "branch": "main",
504 "repository_url": "git@github.com:test/repo.git"
505 }
506 }),
507 };
508
509 let meta = parse_session_meta(&entry).unwrap();
510 assert_eq!(meta.id, "019ac306-3951-72d1-8b5d-447c33ce17f5");
511 assert_eq!(meta.cwd, Some("/Users/storm/repos/test".to_string()));
512 assert_eq!(meta.cli_version, Some("0.63.0".to_string()));
513 assert_eq!(meta.git.as_ref().unwrap().branch, Some("main".to_string()));
514 }
515
516 #[test]
517 fn test_parse_token_count_event() {
518 let entry = CodexSessionEntry {
519 timestamp: "2025-11-27T01:56:10.186Z".to_string(),
520 entry_type: "event_msg".to_string(),
521 payload: serde_json::json!({
522 "type": "token_count",
523 "info": {
524 "total_token_usage": {
525 "input_tokens": 3597,
526 "cached_input_tokens": 3072,
527 "output_tokens": 33,
528 "reasoning_output_tokens": 0,
529 "total_tokens": 3630
530 },
531 "last_token_usage": {
532 "input_tokens": 500,
533 "cached_input_tokens": 400,
534 "output_tokens": 33,
535 "reasoning_output_tokens": 10,
536 "total_tokens": 533
537 },
538 "model_context_window": 258400
539 }
540 }),
541 };
542
543 let session_meta = Some(SessionMeta {
544 id: "test".to_string(),
545 timestamp: "2025-11-27T01:55:56.369Z".to_string(),
546 cwd: Some("/test/cwd".to_string()),
547 cli_version: None,
548 model_provider: None,
549 git: Some(GitInfo {
550 branch: Some("main".to_string()),
551 commit_hash: None,
552 repository_url: None,
553 }),
554 });
555
556 let mut turn_model = Some("gpt-5.1-codex".to_string());
557 let mut call_map = FunctionCallMap::default();
558
559 let events = entry.into_events(
560 "machine-1",
561 "session-1",
562 &mut call_map,
563 &session_meta,
564 &mut turn_model,
565 );
566
567 assert_eq!(events.len(), 1);
568 let event = &events[0];
569 assert_eq!(event.event_type, EventType::ApiRequest);
570 assert_eq!(event.tokens_input, Some(500)); assert_eq!(event.tokens_output, Some(33));
572 assert_eq!(event.tokens_cache_read, Some(400));
573 assert_eq!(event.model, Some("gpt-5.1-codex".to_string()));
574 assert_eq!(event.cwd, Some("/test/cwd".to_string()));
575 assert_eq!(event.git_branch, Some("main".to_string()));
576
577 let metadata: serde_json::Value =
579 serde_json::from_str(event.metadata.as_ref().unwrap()).unwrap();
580 assert_eq!(metadata["reasoning_output_tokens"], 10);
581 assert_eq!(metadata["model_context_window"], 258400);
582 }
583
584 #[test]
585 fn test_parse_user_message_event() {
586 let entry = CodexSessionEntry {
587 timestamp: "2025-11-27T01:56:07.612Z".to_string(),
588 entry_type: "event_msg".to_string(),
589 payload: serde_json::json!({
590 "type": "user_message",
591 "message": "Hello, world!",
592 "images": []
593 }),
594 };
595
596 let mut call_map = FunctionCallMap::default();
597 let mut turn_model = None;
598
599 let events = entry.into_events(
600 "machine-1",
601 "session-1",
602 &mut call_map,
603 &None,
604 &mut turn_model,
605 );
606
607 assert_eq!(events.len(), 1);
608 let event = &events[0];
609 assert_eq!(event.event_type, EventType::UserPromptSubmit);
610
611 let payload: serde_json::Value =
612 serde_json::from_str(event.payload.as_ref().unwrap()).unwrap();
613 assert_eq!(payload["prompt"], "Hello, world!");
614 }
615
616 #[test]
617 fn test_parse_function_call_and_output() {
618 let call_entry = CodexSessionEntry {
619 timestamp: "2025-11-27T01:56:10.186Z".to_string(),
620 entry_type: "response_item".to_string(),
621 payload: serde_json::json!({
622 "type": "function_call",
623 "name": "shell_command",
624 "arguments": "{\"command\":\"ls\",\"workdir\":\"/test\"}",
625 "call_id": "call_123"
626 }),
627 };
628
629 let output_entry = CodexSessionEntry {
630 timestamp: "2025-11-27T01:56:10.300Z".to_string(),
631 entry_type: "response_item".to_string(),
632 payload: serde_json::json!({
633 "type": "function_call_output",
634 "call_id": "call_123",
635 "output": "file1.txt\nfile2.txt"
636 }),
637 };
638
639 let mut call_map = FunctionCallMap::default();
640 let mut turn_model = None;
641
642 let call_events = call_entry.into_events(
644 "machine-1",
645 "session-1",
646 &mut call_map,
647 &None,
648 &mut turn_model,
649 );
650 assert_eq!(call_events.len(), 1);
651 assert_eq!(call_events[0].event_type, EventType::PreToolUse);
652 assert_eq!(call_events[0].tool_use_id, Some("call_123".to_string()));
653 assert_eq!(call_events[0].tool_name, Some("shell_command".to_string()));
654
655 let output_events = output_entry.into_events(
657 "machine-1",
658 "session-1",
659 &mut call_map,
660 &None,
661 &mut turn_model,
662 );
663 assert_eq!(output_events.len(), 1);
664 assert_eq!(output_events[0].event_type, EventType::PostToolUse);
665 assert_eq!(output_events[0].tool_use_id, Some("call_123".to_string()));
666 assert_eq!(
667 output_events[0].tool_name,
668 Some("shell_command".to_string())
669 );
670
671 let payload: serde_json::Value =
673 serde_json::from_str(output_events[0].payload.as_ref().unwrap()).unwrap();
674 assert_eq!(payload["command"], "ls");
675
676 let metadata: serde_json::Value =
678 serde_json::from_str(output_events[0].metadata.as_ref().unwrap()).unwrap();
679 assert_eq!(metadata["output"], "file1.txt\nfile2.txt");
680 }
681
682 #[test]
683 fn test_turn_context_updates_model() {
684 let ctx_entry = CodexSessionEntry {
685 timestamp: "2025-11-27T01:56:07.612Z".to_string(),
686 entry_type: "turn_context".to_string(),
687 payload: serde_json::json!({
688 "cwd": "/test",
689 "model": "gpt-5.2-codex",
690 "approval_policy": "on-request"
691 }),
692 };
693
694 let mut call_map = FunctionCallMap::default();
695 let mut turn_model = None;
696
697 let events = ctx_entry.into_events(
698 "machine-1",
699 "session-1",
700 &mut call_map,
701 &None,
702 &mut turn_model,
703 );
704
705 assert!(events.is_empty()); assert_eq!(turn_model, Some("gpt-5.2-codex".to_string()));
707 }
708
709 #[test]
710 fn test_skip_null_token_info() {
711 let entry = CodexSessionEntry {
712 timestamp: "2025-11-27T01:56:08.167Z".to_string(),
713 entry_type: "event_msg".to_string(),
714 payload: serde_json::json!({
715 "type": "token_count",
716 "info": null,
717 "rate_limits": {}
718 }),
719 };
720
721 let mut call_map = FunctionCallMap::default();
722 let mut turn_model = None;
723
724 let events = entry.into_events(
725 "machine-1",
726 "session-1",
727 &mut call_map,
728 &None,
729 &mut turn_model,
730 );
731
732 assert!(events.is_empty());
733 }
734
735 #[test]
736 fn test_dedup_key() {
737 let entry = CodexSessionEntry {
738 timestamp: "2025-11-27T01:56:10.186Z".to_string(),
739 entry_type: "event_msg".to_string(),
740 payload: serde_json::json!({}),
741 };
742
743 assert_eq!(
744 entry.dedup_key(),
745 "2025-11-27T01:56:10.186Z:event_msg".to_string()
746 );
747 }
748}