1use std::collections::HashMap;
2
3use base64::Engine;
4use codex_utils_image::load_and_resize_to_fit;
5use mcp_types::CallToolResult;
6use mcp_types::ContentBlock;
7use serde::Deserialize;
8use serde::Deserializer;
9use serde::Serialize;
10use serde::ser::Serializer;
11use ts_rs::TS;
12
13use crate::user_input::UserInput;
14use codex_git::GhostCommit;
15use codex_utils_image::error::ImageProcessingError;
16use schemars::JsonSchema;
17
18#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
19#[serde(tag = "type", rename_all = "snake_case")]
20pub enum ResponseInputItem {
21 Message {
22 role: String,
23 content: Vec<ContentItem>,
24 },
25 FunctionCallOutput {
26 call_id: String,
27 output: FunctionCallOutputPayload,
28 },
29 McpToolCallOutput {
30 call_id: String,
31 result: Result<CallToolResult, String>,
32 },
33 CustomToolCallOutput {
34 call_id: String,
35 output: String,
36 },
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
40#[serde(tag = "type", rename_all = "snake_case")]
41pub enum ContentItem {
42 InputText { text: String },
43 InputImage { image_url: String },
44 OutputText { text: String },
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
48#[serde(tag = "type", rename_all = "snake_case")]
49pub enum ResponseItem {
50 Message {
51 #[serde(default, skip_serializing)]
52 #[ts(skip)]
53 id: Option<String>,
54 role: String,
55 content: Vec<ContentItem>,
56 },
57 Reasoning {
58 #[serde(default, skip_serializing)]
59 #[ts(skip)]
60 id: String,
61 summary: Vec<ReasoningItemReasoningSummary>,
62 #[serde(default, skip_serializing_if = "should_serialize_reasoning_content")]
63 #[ts(optional)]
64 content: Option<Vec<ReasoningItemContent>>,
65 encrypted_content: Option<String>,
66 },
67 LocalShellCall {
68 #[serde(default, skip_serializing)]
70 #[ts(skip)]
71 id: Option<String>,
72 call_id: Option<String>,
74 status: LocalShellStatus,
75 action: LocalShellAction,
76 },
77 FunctionCall {
78 #[serde(default, skip_serializing)]
79 #[ts(skip)]
80 id: Option<String>,
81 name: String,
82 arguments: String,
87 call_id: String,
88 },
89 FunctionCallOutput {
95 call_id: String,
96 output: FunctionCallOutputPayload,
97 },
98 CustomToolCall {
99 #[serde(default, skip_serializing)]
100 #[ts(skip)]
101 id: Option<String>,
102 #[serde(default, skip_serializing_if = "Option::is_none")]
103 #[ts(optional)]
104 status: Option<String>,
105
106 call_id: String,
107 name: String,
108 input: String,
109 },
110 CustomToolCallOutput {
111 call_id: String,
112 output: String,
113 },
114 WebSearchCall {
123 #[serde(default, skip_serializing)]
124 #[ts(skip)]
125 id: Option<String>,
126 #[serde(default, skip_serializing_if = "Option::is_none")]
127 #[ts(optional)]
128 status: Option<String>,
129 action: WebSearchAction,
130 },
131 GhostSnapshot {
133 ghost_commit: GhostCommit,
134 },
135 CompactionSummary {
136 encrypted_content: String,
137 },
138 #[serde(other)]
139 Other,
140}
141
142fn should_serialize_reasoning_content(content: &Option<Vec<ReasoningItemContent>>) -> bool {
143 match content {
144 Some(content) => !content
145 .iter()
146 .any(|c| matches!(c, ReasoningItemContent::ReasoningText { .. })),
147 None => false,
148 }
149}
150
151fn local_image_error_placeholder(
152 path: &std::path::Path,
153 error: impl std::fmt::Display,
154) -> ContentItem {
155 ContentItem::InputText {
156 text: format!(
157 "Codex could not read the local image at `{}`: {}",
158 path.display(),
159 error
160 ),
161 }
162}
163
164fn invalid_image_error_placeholder(
165 path: &std::path::Path,
166 error: impl std::fmt::Display,
167) -> ContentItem {
168 ContentItem::InputText {
169 text: format!(
170 "Image located at `{}` is invalid: {}",
171 path.display(),
172 error
173 ),
174 }
175}
176
177impl From<ResponseInputItem> for ResponseItem {
178 fn from(item: ResponseInputItem) -> Self {
179 match item {
180 ResponseInputItem::Message { role, content } => Self::Message {
181 role,
182 content,
183 id: None,
184 },
185 ResponseInputItem::FunctionCallOutput { call_id, output } => {
186 Self::FunctionCallOutput { call_id, output }
187 }
188 ResponseInputItem::McpToolCallOutput { call_id, result } => {
189 let output = match result {
190 Ok(result) => FunctionCallOutputPayload::from(&result),
191 Err(tool_call_err) => FunctionCallOutputPayload {
192 content: format!("err: {tool_call_err:?}"),
193 success: Some(false),
194 ..Default::default()
195 },
196 };
197 Self::FunctionCallOutput { call_id, output }
198 }
199 ResponseInputItem::CustomToolCallOutput { call_id, output } => {
200 Self::CustomToolCallOutput { call_id, output }
201 }
202 }
203 }
204}
205
206#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
207#[serde(rename_all = "snake_case")]
208pub enum LocalShellStatus {
209 Completed,
210 InProgress,
211 Incomplete,
212}
213
214#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
215#[serde(tag = "type", rename_all = "snake_case")]
216pub enum LocalShellAction {
217 Exec(LocalShellExecAction),
218}
219
220#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
221pub struct LocalShellExecAction {
222 pub command: Vec<String>,
223 pub timeout_ms: Option<u64>,
224 pub working_directory: Option<String>,
225 pub env: Option<HashMap<String, String>>,
226 pub user: Option<String>,
227}
228
229#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
230#[serde(tag = "type", rename_all = "snake_case")]
231pub enum WebSearchAction {
232 Search {
233 #[serde(default, skip_serializing_if = "Option::is_none")]
234 #[ts(optional)]
235 query: Option<String>,
236 },
237 OpenPage {
238 #[serde(default, skip_serializing_if = "Option::is_none")]
239 #[ts(optional)]
240 url: Option<String>,
241 },
242 FindInPage {
243 #[serde(default, skip_serializing_if = "Option::is_none")]
244 #[ts(optional)]
245 url: Option<String>,
246 #[serde(default, skip_serializing_if = "Option::is_none")]
247 #[ts(optional)]
248 pattern: Option<String>,
249 },
250
251 #[serde(other)]
252 Other,
253}
254
255#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
256#[serde(tag = "type", rename_all = "snake_case")]
257pub enum ReasoningItemReasoningSummary {
258 SummaryText { text: String },
259}
260
261#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
262#[serde(tag = "type", rename_all = "snake_case")]
263pub enum ReasoningItemContent {
264 ReasoningText { text: String },
265 Text { text: String },
266}
267
268impl From<Vec<UserInput>> for ResponseInputItem {
269 fn from(items: Vec<UserInput>) -> Self {
270 Self::Message {
271 role: "user".to_string(),
272 content: items
273 .into_iter()
274 .map(|c| match c {
275 UserInput::Text { text } => ContentItem::InputText { text },
276 UserInput::Image { image_url } => ContentItem::InputImage { image_url },
277 UserInput::LocalImage { path } => match load_and_resize_to_fit(&path) {
278 Ok(image) => ContentItem::InputImage {
279 image_url: image.into_data_url(),
280 },
281 Err(err) => {
282 if matches!(&err, ImageProcessingError::Read { .. }) {
283 local_image_error_placeholder(&path, &err)
284 } else if err.is_invalid_image() {
285 invalid_image_error_placeholder(&path, &err)
286 } else {
287 match std::fs::read(&path) {
288 Ok(bytes) => {
289 let Some(mime_guess) = mime_guess::from_path(&path).first()
290 else {
291 return local_image_error_placeholder(
292 &path,
293 "unsupported MIME type (unknown)",
294 );
295 };
296 let mime = mime_guess.essence_str().to_owned();
297 if !mime.starts_with("image/") {
298 return local_image_error_placeholder(
299 &path,
300 format!("unsupported MIME type `{mime}`"),
301 );
302 }
303 let encoded =
304 base64::engine::general_purpose::STANDARD.encode(bytes);
305 ContentItem::InputImage {
306 image_url: format!("data:{mime};base64,{encoded}"),
307 }
308 }
309 Err(read_err) => {
310 tracing::warn!(
311 "Skipping image {} – could not read file: {}",
312 path.display(),
313 read_err
314 );
315 local_image_error_placeholder(&path, &read_err)
316 }
317 }
318 }
319 }
320 },
321 })
322 .collect::<Vec<ContentItem>>(),
323 }
324 }
325}
326
327#[derive(Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
330pub struct ShellToolCallParams {
331 pub command: Vec<String>,
332 pub workdir: Option<String>,
333
334 #[serde(alias = "timeout")]
336 pub timeout_ms: Option<u64>,
337 #[serde(skip_serializing_if = "Option::is_none")]
338 pub with_escalated_permissions: Option<bool>,
339 #[serde(skip_serializing_if = "Option::is_none")]
340 pub justification: Option<String>,
341}
342
343#[derive(Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
346pub struct ShellCommandToolCallParams {
347 pub command: String,
348 pub workdir: Option<String>,
349
350 #[serde(alias = "timeout")]
352 pub timeout_ms: Option<u64>,
353 #[serde(skip_serializing_if = "Option::is_none")]
354 pub with_escalated_permissions: Option<bool>,
355 #[serde(skip_serializing_if = "Option::is_none")]
356 pub justification: Option<String>,
357}
358
359#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
362#[serde(tag = "type", rename_all = "snake_case")]
363pub enum FunctionCallOutputContentItem {
364 InputText { text: String },
366 InputImage { image_url: String },
368}
369
370#[derive(Debug, Default, Clone, PartialEq, JsonSchema, TS)]
378pub struct FunctionCallOutputPayload {
379 pub content: String,
380 #[serde(skip_serializing_if = "Option::is_none")]
381 pub content_items: Option<Vec<FunctionCallOutputContentItem>>,
382 pub success: Option<bool>,
384}
385
386#[derive(Deserialize)]
387#[serde(untagged)]
388enum FunctionCallOutputPayloadSerde {
389 Text(String),
390 Items(Vec<FunctionCallOutputContentItem>),
391}
392
393impl Serialize for FunctionCallOutputPayload {
397 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
398 where
399 S: Serializer,
400 {
401 tracing::debug!("Function call output payload: {:?}", self);
402 if let Some(items) = &self.content_items {
403 items.serialize(serializer)
404 } else {
405 serializer.serialize_str(&self.content)
406 }
407 }
408}
409
410impl<'de> Deserialize<'de> for FunctionCallOutputPayload {
411 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
412 where
413 D: Deserializer<'de>,
414 {
415 match FunctionCallOutputPayloadSerde::deserialize(deserializer)? {
416 FunctionCallOutputPayloadSerde::Text(content) => Ok(FunctionCallOutputPayload {
417 content,
418 ..Default::default()
419 }),
420 FunctionCallOutputPayloadSerde::Items(items) => {
421 let content = serde_json::to_string(&items).map_err(serde::de::Error::custom)?;
422 Ok(FunctionCallOutputPayload {
423 content,
424 content_items: Some(items),
425 success: None,
426 })
427 }
428 }
429 }
430}
431
432impl From<&CallToolResult> for FunctionCallOutputPayload {
433 fn from(call_tool_result: &CallToolResult) -> Self {
434 let CallToolResult {
435 content,
436 structured_content,
437 is_error,
438 } = call_tool_result;
439
440 let is_success = is_error != &Some(true);
441
442 if let Some(structured_content) = structured_content
443 && !structured_content.is_null()
444 {
445 match serde_json::to_string(structured_content) {
446 Ok(serialized_structured_content) => {
447 return FunctionCallOutputPayload {
448 content: serialized_structured_content,
449 success: Some(is_success),
450 ..Default::default()
451 };
452 }
453 Err(err) => {
454 return FunctionCallOutputPayload {
455 content: err.to_string(),
456 success: Some(false),
457 ..Default::default()
458 };
459 }
460 }
461 }
462
463 let serialized_content = match serde_json::to_string(content) {
464 Ok(serialized_content) => serialized_content,
465 Err(err) => {
466 return FunctionCallOutputPayload {
467 content: err.to_string(),
468 success: Some(false),
469 ..Default::default()
470 };
471 }
472 };
473
474 let content_items = convert_content_blocks_to_items(content);
475
476 FunctionCallOutputPayload {
477 content: serialized_content,
478 content_items,
479 success: Some(is_success),
480 }
481 }
482}
483
484fn convert_content_blocks_to_items(
485 blocks: &[ContentBlock],
486) -> Option<Vec<FunctionCallOutputContentItem>> {
487 let mut saw_image = false;
488 let mut items = Vec::with_capacity(blocks.len());
489 tracing::warn!("Blocks: {:?}", blocks);
490 for block in blocks {
491 match block {
492 ContentBlock::TextContent(text) => {
493 items.push(FunctionCallOutputContentItem::InputText {
494 text: text.text.clone(),
495 });
496 }
497 ContentBlock::ImageContent(image) => {
498 saw_image = true;
499 let image_url = if image.data.starts_with("data:") {
501 image.data.clone()
502 } else {
503 format!("data:{};base64,{}", image.mime_type, image.data)
504 };
505 items.push(FunctionCallOutputContentItem::InputImage { image_url });
506 }
507 _ => return None,
509 }
510 }
511
512 if saw_image { Some(items) } else { None }
513}
514
515impl std::fmt::Display for FunctionCallOutputPayload {
520 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
521 f.write_str(&self.content)
522 }
523}
524
525impl std::ops::Deref for FunctionCallOutputPayload {
526 type Target = str;
527 fn deref(&self) -> &Self::Target {
528 &self.content
529 }
530}
531
532#[cfg(test)]
535mod tests {
536 use super::*;
537 use anyhow::Result;
538 use mcp_types::ImageContent;
539 use mcp_types::TextContent;
540 use tempfile::tempdir;
541
542 #[test]
543 fn serializes_success_as_plain_string() -> Result<()> {
544 let item = ResponseInputItem::FunctionCallOutput {
545 call_id: "call1".into(),
546 output: FunctionCallOutputPayload {
547 content: "ok".into(),
548 ..Default::default()
549 },
550 };
551
552 let json = serde_json::to_string(&item)?;
553 let v: serde_json::Value = serde_json::from_str(&json)?;
554
555 assert_eq!(v.get("output").unwrap().as_str().unwrap(), "ok");
557 Ok(())
558 }
559
560 #[test]
561 fn serializes_failure_as_string() -> Result<()> {
562 let item = ResponseInputItem::FunctionCallOutput {
563 call_id: "call1".into(),
564 output: FunctionCallOutputPayload {
565 content: "bad".into(),
566 success: Some(false),
567 ..Default::default()
568 },
569 };
570
571 let json = serde_json::to_string(&item)?;
572 let v: serde_json::Value = serde_json::from_str(&json)?;
573
574 assert_eq!(v.get("output").unwrap().as_str().unwrap(), "bad");
575 Ok(())
576 }
577
578 #[test]
579 fn serializes_image_outputs_as_array() -> Result<()> {
580 let call_tool_result = CallToolResult {
581 content: vec![
582 ContentBlock::TextContent(TextContent {
583 annotations: None,
584 text: "caption".into(),
585 r#type: "text".into(),
586 }),
587 ContentBlock::ImageContent(ImageContent {
588 annotations: None,
589 data: "BASE64".into(),
590 mime_type: "image/png".into(),
591 r#type: "image".into(),
592 }),
593 ],
594 is_error: None,
595 structured_content: None,
596 };
597
598 let payload = FunctionCallOutputPayload::from(&call_tool_result);
599 assert_eq!(payload.success, Some(true));
600 let items = payload.content_items.clone().expect("content items");
601 assert_eq!(
602 items,
603 vec![
604 FunctionCallOutputContentItem::InputText {
605 text: "caption".into(),
606 },
607 FunctionCallOutputContentItem::InputImage {
608 image_url: "".into(),
609 },
610 ]
611 );
612
613 let item = ResponseInputItem::FunctionCallOutput {
614 call_id: "call1".into(),
615 output: payload,
616 };
617
618 let json = serde_json::to_string(&item)?;
619 let v: serde_json::Value = serde_json::from_str(&json)?;
620
621 let output = v.get("output").expect("output field");
622 assert!(output.is_array(), "expected array output");
623
624 Ok(())
625 }
626
627 #[test]
628 fn deserializes_array_payload_into_items() -> Result<()> {
629 let json = r#"[
630 {"type": "input_text", "text": "note"},
631 {"type": "input_image", "image_url": ""}
632 ]"#;
633
634 let payload: FunctionCallOutputPayload = serde_json::from_str(json)?;
635
636 assert_eq!(payload.success, None);
637 let expected_items = vec![
638 FunctionCallOutputContentItem::InputText {
639 text: "note".into(),
640 },
641 FunctionCallOutputContentItem::InputImage {
642 image_url: "".into(),
643 },
644 ];
645 assert_eq!(payload.content_items, Some(expected_items.clone()));
646
647 let expected_content = serde_json::to_string(&expected_items)?;
648 assert_eq!(payload.content, expected_content);
649
650 Ok(())
651 }
652
653 #[test]
654 fn roundtrips_web_search_call_actions() -> Result<()> {
655 let cases = vec![
656 (
657 r#"{
658 "type": "web_search_call",
659 "status": "completed",
660 "action": {
661 "type": "search",
662 "query": "weather seattle"
663 }
664 }"#,
665 WebSearchAction::Search {
666 query: Some("weather seattle".into()),
667 },
668 Some("completed".into()),
669 ),
670 (
671 r#"{
672 "type": "web_search_call",
673 "status": "open",
674 "action": {
675 "type": "open_page",
676 "url": "https://example.com"
677 }
678 }"#,
679 WebSearchAction::OpenPage {
680 url: Some("https://example.com".into()),
681 },
682 Some("open".into()),
683 ),
684 (
685 r#"{
686 "type": "web_search_call",
687 "status": "in_progress",
688 "action": {
689 "type": "find_in_page",
690 "url": "https://example.com/docs",
691 "pattern": "installation"
692 }
693 }"#,
694 WebSearchAction::FindInPage {
695 url: Some("https://example.com/docs".into()),
696 pattern: Some("installation".into()),
697 },
698 Some("in_progress".into()),
699 ),
700 ];
701
702 for (json_literal, expected_action, expected_status) in cases {
703 let parsed: ResponseItem = serde_json::from_str(json_literal)?;
704 let expected = ResponseItem::WebSearchCall {
705 id: None,
706 status: expected_status.clone(),
707 action: expected_action.clone(),
708 };
709 assert_eq!(parsed, expected);
710
711 let serialized = serde_json::to_value(&parsed)?;
712 let original_value: serde_json::Value = serde_json::from_str(json_literal)?;
713 assert_eq!(serialized, original_value);
714 }
715
716 Ok(())
717 }
718
719 #[test]
720 fn deserialize_shell_tool_call_params() -> Result<()> {
721 let json = r#"{
722 "command": ["ls", "-l"],
723 "workdir": "/tmp",
724 "timeout": 1000
725 }"#;
726
727 let params: ShellToolCallParams = serde_json::from_str(json)?;
728 assert_eq!(
729 ShellToolCallParams {
730 command: vec!["ls".to_string(), "-l".to_string()],
731 workdir: Some("/tmp".to_string()),
732 timeout_ms: Some(1000),
733 with_escalated_permissions: None,
734 justification: None,
735 },
736 params
737 );
738 Ok(())
739 }
740
741 #[test]
742 fn local_image_read_error_adds_placeholder() -> Result<()> {
743 let dir = tempdir()?;
744 let missing_path = dir.path().join("missing-image.png");
745
746 let item = ResponseInputItem::from(vec![UserInput::LocalImage {
747 path: missing_path.clone(),
748 }]);
749
750 match item {
751 ResponseInputItem::Message { content, .. } => {
752 assert_eq!(content.len(), 1);
753 match &content[0] {
754 ContentItem::InputText { text } => {
755 let display_path = missing_path.display().to_string();
756 assert!(
757 text.contains(&display_path),
758 "placeholder should mention missing path: {text}"
759 );
760 assert!(
761 text.contains("could not read"),
762 "placeholder should mention read issue: {text}"
763 );
764 }
765 other => panic!("expected placeholder text but found {other:?}"),
766 }
767 }
768 other => panic!("expected message response but got {other:?}"),
769 }
770
771 Ok(())
772 }
773
774 #[test]
775 fn local_image_non_image_adds_placeholder() -> Result<()> {
776 let dir = tempdir()?;
777 let json_path = dir.path().join("example.json");
778 std::fs::write(&json_path, br#"{"hello":"world"}"#)?;
779
780 let item = ResponseInputItem::from(vec![UserInput::LocalImage {
781 path: json_path.clone(),
782 }]);
783
784 match item {
785 ResponseInputItem::Message { content, .. } => {
786 assert_eq!(content.len(), 1);
787 match &content[0] {
788 ContentItem::InputText { text } => {
789 assert!(
790 text.contains("unsupported MIME type `application/json`"),
791 "placeholder should mention unsupported MIME: {text}"
792 );
793 assert!(
794 text.contains(&json_path.display().to_string()),
795 "placeholder should mention path: {text}"
796 );
797 }
798 other => panic!("expected placeholder text but found {other:?}"),
799 }
800 }
801 other => panic!("expected message response but got {other:?}"),
802 }
803
804 Ok(())
805 }
806}