Skip to main content

objectiveai_sdk/mcp/tool/
content_block.rs

1//! MCP content block enum.
2//!
3//! A content block is the union of all content types that can appear in
4//! prompts, tool results, and sampling messages.
5//!
6//! # Round-trip with [`RichContentPart`]
7//!
8//! `From<RichContentPart> for ContentBlock` always produces one of
9//! [`ContentBlock::Text`], [`ContentBlock::Image`], or
10//! [`ContentBlock::Audio`] — never `EmbeddedResource` or
11//! `ResourceLink`. Variants that have no native MCP carrier
12//! (`InputVideo`, `VideoUrl`, `File`) and remote-URL `ImageUrl`s
13//! land as `Text` blocks; the original variant is encoded in
14//! `_meta` markers so the reverse `From<ContentBlock>` arm rebuilds
15//! the identical [`RichContentPart`].
16//!
17//! Round-trip property: for every `RichContentPart` value `p`,
18//! `RichContentPart::from(ContentBlock::from(p.clone())) == p` —
19//! with three documented exceptions:
20//!
21//! 1. **`File` multi-field collapse.** When two or more of
22//!    `file_data`, `file_url`, `file_id` are set on the same
23//!    `File`, the forward conversion picks one by precedence
24//!    (`file_data` > `file_url` > `file_id`) and the others are
25//!    dropped. `filename` rides through losslessly via the
26//!    `objectiveai/filename` meta marker.
27//! 2. **`RichContentPart::Text` containing a base64 data URL.**
28//!    `RichContentPart::Text { text: "data:image/png;base64,..." }`
29//!    forward-converts to `ContentBlock::Text(t)` with the same
30//!    body and no `kind` marker; the reverse arm spots the
31//!    data-URL shape and returns a media variant (`ImageUrl`,
32//!    `InputAudio`, etc.) rather than `Text`. This is intentional —
33//!    a Text payload that happens to be a well-formed data URL is
34//!    treated as media on every other entry point (the
35//!    `From<TextContent>` arm in the SDK does the same thing), and
36//!    splitting the behaviour here would be more surprising than
37//!    the round-trip loss.
38//! 3. **`RichContentPart::VideoUrl` collapses to `InputVideo`.** The
39//!    forward path treats `InputVideo` and `VideoUrl` as the same
40//!    carrier (data URL → unmarked `Text`, remote URL → `Text` with
41//!    `kind = "input_video_remote"`), so the reverse arm always
42//!    rebuilds an `InputVideo` regardless of which variant the
43//!    forward call started with. Intentional: the distinction was
44//!    only meaningful for the legacy double-encoding, and unifying
45//!    the two halves the marker surface area.
46//!
47//! ## `_meta` markers
48//!
49//! Three keys, all namespaced under `objectiveai/`:
50//!
51//! - **`objectiveai/kind`** (Text carrier only) — enum string,
52//!   tells the reverse arm the Text block is the encoded form of a
53//!   non-Text variant. Values:
54//!   - `"image_url_remote"`: body is a remote URL for an
55//!     [`RichContentPart::ImageUrl`].
56//!   - `"input_video_remote"`: body is a remote URL for an
57//!     [`RichContentPart::InputVideo`]. Also emitted for remote
58//!     [`RichContentPart::VideoUrl`] — the two video variants share
59//!     a forward path and both round-trip to `InputVideo`.
60//!   - `"file_url"`: body is a remote URL for a
61//!     [`RichContentPart::File`] whose primary field is `file_url`.
62//!   - `"file_id"`: body is an opaque ID for a
63//!     [`RichContentPart::File`] whose primary field is `file_id`.
64//! - **`objectiveai/image_detail`** (Image carrier, or Text with
65//!   `kind: "image_url_remote"`) — preserves
66//!   [`ImageUrl::detail`](crate::agent::completions::message::ImageUrl::detail).
67//! - **`objectiveai/filename`** (any carrier representing a
68//!   [`RichContentPart::File`]) — preserves
69//!   [`File::filename`](crate::agent::completions::message::File::filename).
70
71use schemars::JsonSchema;
72use serde::{Deserialize, Serialize};
73
74/// `_meta` key namespacing all round-trip markers, so we don't
75/// collide with any official MCP metadata convention.
76const META_KIND: &str = "objectiveai/kind";
77const META_IMAGE_DETAIL: &str = "objectiveai/image_detail";
78const META_FILENAME: &str = "objectiveai/filename";
79
80/// `objectiveai/kind` enum tag values.
81const KIND_IMAGE_URL_REMOTE: &str = "image_url_remote";
82const KIND_INPUT_VIDEO_REMOTE: &str = "input_video_remote";
83const KIND_FILE_URL: &str = "file_url";
84const KIND_FILE_ID: &str = "file_id";
85
86/// A content block that can be used in prompts and tool results.
87#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
88#[serde(tag = "type")]
89#[schemars(rename = "mcp.tool.ContentBlock")]
90pub enum ContentBlock {
91    /// Text content.
92    #[serde(rename = "text")]
93    #[schemars(title = "Text")]
94    Text(super::TextContent),
95    /// Image content (base64-encoded).
96    #[serde(rename = "image")]
97    #[schemars(title = "Image")]
98    Image(super::ImageContent),
99    /// Audio content (base64-encoded).
100    #[serde(rename = "audio")]
101    #[schemars(title = "Audio")]
102    Audio(super::AudioContent),
103    /// A resource link.
104    #[serde(rename = "resource_link")]
105    #[schemars(title = "ResourceLink")]
106    ResourceLink(super::ResourceLink),
107    /// An embedded resource.
108    #[serde(rename = "resource")]
109    #[schemars(title = "EmbeddedResource")]
110    EmbeddedResource(super::EmbeddedResource),
111}
112
113/// Convert a single `RichContentPart` into a `ContentBlock`.
114///
115/// Forward path produces only `Text`, `Image`, and `Audio` carriers
116/// — never `EmbeddedResource` or `ResourceLink`. See the
117/// module-level docs for the round-trip property and the `_meta`
118/// marker catalogue.
119///
120/// Upstream-specific converters (`claude_agent_sdk`, `codex_sdk`)
121/// use stricter `TryFrom` impls that reject unsupported parts. This
122/// `From` is the generic, round-trip-preserving path used by
123/// `agent/completions/notify` and the MCP tool-response formatter.
124impl From<crate::agent::completions::message::RichContentPart>
125    for ContentBlock
126{
127    fn from(part: crate::agent::completions::message::RichContentPart) -> Self {
128        use crate::agent::completions::message::RichContentPart;
129        match part {
130            RichContentPart::Text { text } => {
131                ContentBlock::Text(super::TextContent {
132                    text,
133                    annotations: None,
134                    _meta: None,
135                })
136            }
137            RichContentPart::ImageUrl { image_url } => image_url.into(),
138            RichContentPart::InputAudio { input_audio } => input_audio.into(),
139            // Both video variants share a forward path; reverse always
140            // rebuilds `InputVideo`. See the third round-trip exception
141            // in the module-level docs.
142            RichContentPart::InputVideo { video_url } => video_url.into(),
143            RichContentPart::VideoUrl { video_url } => video_url.into(),
144            RichContentPart::File { file } => file.into(),
145        }
146    }
147}
148
149/// Direct conversion from a typed `ImageUrl` to a `ContentBlock`.
150/// Same body as the `RichContentPart::ImageUrl` arm of
151/// [`From<RichContentPart> for ContentBlock`] — kept independent so
152/// per-leaf `CommandResponse::into_mcp` impls (whose `Response` is
153/// already an `ImageUrl`) can call `image_url.into()` without first
154/// wrapping in `RichContentPart`.
155impl From<crate::agent::completions::message::ImageUrl> for ContentBlock {
156    fn from(image_url: crate::agent::completions::message::ImageUrl) -> Self {
157        // Serialize detail (an enum) once via serde_json so
158        // we hand markers a Value::String("auto"|"low"|"high"),
159        // not the typed enum literal.
160        let detail_value = image_url
161            .detail
162            .as_ref()
163            .and_then(|d| serde_json::to_value(d).ok());
164        match super::ImageContent::try_from(image_url) {
165            Ok(mut ic) => {
166                // Data-URL path: lossless Image carrier.
167                // Detail (when present) rides in _meta.
168                if let Some(v) = detail_value {
169                    let mut m = indexmap::IndexMap::new();
170                    m.insert(META_IMAGE_DETAIL.to_string(), v);
171                    ic._meta = Some(m);
172                }
173                ContentBlock::Image(ic)
174            }
175            Err(err) => {
176                // Remote URL: stash on Text with kind marker
177                // so reverse can rebuild ImageUrl.
178                let mut meta = indexmap::IndexMap::new();
179                meta.insert(
180                    META_KIND.to_string(),
181                    serde_json::Value::String(
182                        KIND_IMAGE_URL_REMOTE.to_string(),
183                    ),
184                );
185                if let Some(v) = detail_value {
186                    meta.insert(META_IMAGE_DETAIL.to_string(), v);
187                }
188                ContentBlock::Text(super::TextContent {
189                    text: err.url,
190                    annotations: None,
191                    _meta: Some(meta),
192                })
193            }
194        }
195    }
196}
197
198/// Direct conversion from a typed `InputAudio` to a `ContentBlock`.
199/// Same body as the `RichContentPart::InputAudio` arm of
200/// [`From<RichContentPart> for ContentBlock`].
201impl From<crate::agent::completions::message::InputAudio> for ContentBlock {
202    fn from(input_audio: crate::agent::completions::message::InputAudio) -> Self {
203        ContentBlock::Audio(input_audio.into())
204    }
205}
206
207/// Direct conversion from a typed `VideoUrl` to a `ContentBlock`.
208/// Shared forward path for both `RichContentPart::InputVideo` and
209/// `RichContentPart::VideoUrl`: data-URL videos round-trip via the
210/// default reverse heuristic (parse_data_url → video/* mime →
211/// InputVideo), so no marker. Remote URLs get
212/// `META_KIND = "input_video_remote"` so the reverse rebuilds an
213/// `InputVideo`. See the third round-trip exception in the
214/// module-level docs.
215impl From<crate::agent::completions::message::VideoUrl> for ContentBlock {
216    fn from(video_url: crate::agent::completions::message::VideoUrl) -> Self {
217        if crate::data_url::parse_data_url(&video_url.url).is_some() {
218            ContentBlock::Text(super::TextContent {
219                text: video_url.url,
220                annotations: None,
221                _meta: None,
222            })
223        } else {
224            ContentBlock::Text(super::TextContent {
225                text: video_url.url,
226                annotations: None,
227                _meta: Some(single_meta(
228                    META_KIND,
229                    KIND_INPUT_VIDEO_REMOTE.to_string(),
230                )),
231            })
232        }
233    }
234}
235
236/// Direct conversion from a typed `File` to a `ContentBlock`.
237/// Multi-field collapse:
238/// `file_data` > `file_url` > `file_id` by precedence. Lower-priority
239/// fields are dropped; `filename` rides through via `_meta`.
240impl From<crate::agent::completions::message::File> for ContentBlock {
241    fn from(file: crate::agent::completions::message::File) -> Self {
242        let filename = file.filename.clone();
243        if let Some(blob) = file.file_data {
244            // Encode as a Text(data:application/octet-stream;base64,...)
245            // — the heuristic reverse decodes it into a File. Filename
246            // rides in _meta.
247            let body = format!("data:application/octet-stream;base64,{blob}");
248            let meta = filename.map(|n| single_meta(META_FILENAME, n));
249            ContentBlock::Text(super::TextContent {
250                text: body,
251                annotations: None,
252                _meta: meta,
253            })
254        } else if let Some(url) = file.file_url {
255            let mut m = single_meta(META_KIND, KIND_FILE_URL.to_string());
256            if let Some(n) = filename {
257                m.insert(META_FILENAME.to_string(), serde_json::Value::String(n));
258            }
259            ContentBlock::Text(super::TextContent {
260                text: url,
261                annotations: None,
262                _meta: Some(m),
263            })
264        } else if let Some(id) = file.file_id {
265            let mut m = single_meta(META_KIND, KIND_FILE_ID.to_string());
266            if let Some(n) = filename {
267                m.insert(META_FILENAME.to_string(), serde_json::Value::String(n));
268            }
269            ContentBlock::Text(super::TextContent {
270                text: id,
271                annotations: None,
272                _meta: Some(m),
273            })
274        } else {
275            // Empty File: nothing to encode. Produce a Text("") carrier
276            // with no markers. Reverse will land it as a Text part —
277            // which is a minor round-trip loss for the (unusual)
278            // empty-File case. Document this in the round-trip caveats.
279            ContentBlock::Text(super::TextContent {
280                text: String::new(),
281                annotations: None,
282                _meta: None,
283            })
284        }
285    }
286}
287
288/// Build a single-entry `_meta` map.
289fn single_meta(
290    key: &str,
291    value: String,
292) -> indexmap::IndexMap<String, serde_json::Value> {
293    let mut m = indexmap::IndexMap::new();
294    m.insert(key.to_string(), serde_json::Value::String(value));
295    m
296}
297
298/// Flatten a `RichContent` into the MCP `Vec<ContentBlock>` shape used
299/// by `POST /notify` and tool results. `RichContent::Text` yields a
300/// single text block; `RichContent::Parts` delegates per-element to
301/// [`From<RichContentPart>`].
302impl From<crate::agent::completions::message::RichContent>
303    for Vec<ContentBlock>
304{
305    fn from(content: crate::agent::completions::message::RichContent) -> Self {
306        use crate::agent::completions::message::RichContent;
307        match content {
308            RichContent::Text(text) => {
309                vec![ContentBlock::Text(super::TextContent {
310                    text,
311                    annotations: None,
312                    _meta: None,
313                })]
314            }
315            RichContent::Parts(parts) => {
316                parts.into_iter().map(Into::into).collect()
317            }
318        }
319    }
320}
321
322#[cfg(test)]
323mod round_trip_tests {
324    use super::*;
325    use crate::agent::completions::message::{
326        File, ImageUrl, ImageUrlDetail, InputAudio, RichContentPart, VideoUrl,
327    };
328
329    /// Normalize a RichContentPart for round-trip comparison: clear
330    /// `File::filename` (which round-trips via `_meta` but the test
331    /// candidates set it to verify the marker is honored) — used
332    /// only when the test expects a documented loss.
333    fn norm(part: &mut RichContentPart) {
334        // Currently nothing to clear universally; filename round-
335        // trips via the `objectiveai/filename` marker. Kept as a
336        // hook for future documented losses.
337        let _ = part;
338    }
339
340    fn assert_round_trips(part: RichContentPart) {
341        let mut expected = part.clone();
342        norm(&mut expected);
343        let block: ContentBlock = part.into();
344        let mut back: RichContentPart = block.into();
345        norm(&mut back);
346        assert_eq!(
347            expected, back,
348            "round-trip mismatch: expected {expected:?}, got {back:?}"
349        );
350    }
351
352    #[test]
353    fn rt_text_plain() {
354        assert_round_trips(RichContentPart::Text {
355            text: "hello world".into(),
356        });
357    }
358
359    #[test]
360    fn rt_image_url_data_url_no_detail() {
361        assert_round_trips(RichContentPart::ImageUrl {
362            image_url: ImageUrl {
363                url: "data:image/png;base64,iVBORw0KGgo".into(),
364                detail: None,
365            },
366        });
367    }
368
369    #[test]
370    fn rt_image_url_data_url_with_detail_high() {
371        assert_round_trips(RichContentPart::ImageUrl {
372            image_url: ImageUrl {
373                url: "data:image/png;base64,iVBORw0KGgo".into(),
374                detail: Some(ImageUrlDetail::High),
375            },
376        });
377    }
378
379    #[test]
380    fn rt_image_url_data_url_with_detail_low() {
381        assert_round_trips(RichContentPart::ImageUrl {
382            image_url: ImageUrl {
383                url: "data:image/jpeg;base64,/9j/4AAQ".into(),
384                detail: Some(ImageUrlDetail::Low),
385            },
386        });
387    }
388
389    #[test]
390    fn rt_image_url_remote_url_no_detail() {
391        assert_round_trips(RichContentPart::ImageUrl {
392            image_url: ImageUrl {
393                url: "https://example.com/a.png".into(),
394                detail: None,
395            },
396        });
397    }
398
399    #[test]
400    fn rt_image_url_remote_url_with_detail() {
401        assert_round_trips(RichContentPart::ImageUrl {
402            image_url: ImageUrl {
403                url: "https://example.com/a.png".into(),
404                detail: Some(ImageUrlDetail::Auto),
405            },
406        });
407    }
408
409    #[test]
410    fn rt_input_audio() {
411        assert_round_trips(RichContentPart::InputAudio {
412            input_audio: InputAudio {
413                data: "SUQzBAA".into(),
414                format: "audio/mpeg".into(),
415            },
416        });
417    }
418
419    #[test]
420    fn rt_input_video_data_url() {
421        assert_round_trips(RichContentPart::InputVideo {
422            video_url: VideoUrl {
423                url: "data:video/mp4;base64,AAAA".into(),
424            },
425        });
426    }
427
428    #[test]
429    fn rt_input_video_remote_url() {
430        assert_round_trips(RichContentPart::InputVideo {
431            video_url: VideoUrl {
432                url: "https://example.com/v.mp4".into(),
433            },
434        });
435    }
436
437    #[test]
438    fn rt_file_with_file_data_no_filename() {
439        assert_round_trips(RichContentPart::File {
440            file: File {
441                file_data: Some("JVBERi0".into()),
442                filename: None,
443                file_id: None,
444                file_url: None,
445            },
446        });
447    }
448
449    #[test]
450    fn rt_file_with_file_data_and_filename() {
451        assert_round_trips(RichContentPart::File {
452            file: File {
453                file_data: Some("JVBERi0".into()),
454                filename: Some("report.pdf".into()),
455                file_id: None,
456                file_url: None,
457            },
458        });
459    }
460
461    #[test]
462    fn rt_file_with_file_url() {
463        assert_round_trips(RichContentPart::File {
464            file: File {
465                file_data: None,
466                filename: Some("remote.bin".into()),
467                file_id: None,
468                file_url: Some("https://example.com/x.bin".into()),
469            },
470        });
471    }
472
473    #[test]
474    fn rt_file_with_file_id() {
475        assert_round_trips(RichContentPart::File {
476            file: File {
477                file_data: None,
478                filename: Some("upstream-name.txt".into()),
479                file_id: Some("file-abc123".into()),
480                file_url: None,
481            },
482        });
483    }
484
485    /// Documented round-trip exception (case 1): multi-field File
486    /// collapses to the highest-precedence field (file_data >
487    /// file_url > file_id). The reverse only recovers the primary
488    /// field plus `filename`.
489    #[test]
490    fn rt_file_multifield_collapses_to_file_data() {
491        let input = RichContentPart::File {
492            file: File {
493                file_data: Some("JVBERi0".into()),
494                filename: Some("multi.bin".into()),
495                file_id: Some("ignored-id".into()),
496                file_url: Some("https://example.com/ignored".into()),
497            },
498        };
499        let block: ContentBlock = input.into();
500        let back: RichContentPart = block.into();
501        let expected = RichContentPart::File {
502            file: File {
503                file_data: Some("JVBERi0".into()),
504                filename: Some("multi.bin".into()),
505                file_id: None,
506                file_url: None,
507            },
508        };
509        assert_eq!(back, expected);
510    }
511
512    /// Documented round-trip exception (case 2): a Text part whose
513    /// body is a well-formed data URL forward-converts to Text(t)
514    /// with no marker; the reverse spots the data URL and returns
515    /// a media variant. Lock the actual behaviour in here so a
516    /// future regression that "fixes" this case is caught.
517    #[test]
518    fn rt_text_containing_data_url_decodes_to_media() {
519        let input = RichContentPart::Text {
520            text: "data:image/png;base64,iVBORw0KGgo".into(),
521        };
522        let block: ContentBlock = input.into();
523        let back: RichContentPart = block.into();
524        assert!(
525            matches!(back, RichContentPart::ImageUrl { .. }),
526            "expected media, got {back:?}"
527        );
528    }
529}