objectiveai_sdk/mcp/tool/content_block.rs
1//! MCP content block enum.
2//!
3//! A content block is the union of all content types that can appear in
4//! prompts, tool results, and sampling messages.
5//!
6//! # Round-trip with [`RichContentPart`]
7//!
8//! `From<RichContentPart> for ContentBlock` always produces one of
9//! [`ContentBlock::Text`], [`ContentBlock::Image`], or
10//! [`ContentBlock::Audio`] — never `EmbeddedResource` or
11//! `ResourceLink`. Variants that have no native MCP carrier
12//! (`InputVideo`, `VideoUrl`, `File`) and remote-URL `ImageUrl`s
13//! land as `Text` blocks; the original variant is encoded in
14//! `_meta` markers so the reverse `From<ContentBlock>` arm rebuilds
15//! the identical [`RichContentPart`].
16//!
17//! Round-trip property: for every `RichContentPart` value `p`,
18//! `RichContentPart::from(ContentBlock::from(p.clone())) == p` —
19//! with three documented exceptions:
20//!
21//! 1. **`File` multi-field collapse.** When two or more of
22//! `file_data`, `file_url`, `file_id` are set on the same
23//! `File`, the forward conversion picks one by precedence
24//! (`file_data` > `file_url` > `file_id`) and the others are
25//! dropped. `filename` rides through losslessly via the
26//! `objectiveai/filename` meta marker.
27//! 2. **`RichContentPart::Text` containing a base64 data URL.**
28//! `RichContentPart::Text { text: "data:image/png;base64,..." }`
29//! forward-converts to `ContentBlock::Text(t)` with the same
30//! body and no `kind` marker; the reverse arm spots the
31//! data-URL shape and returns a media variant (`ImageUrl`,
32//! `InputAudio`, etc.) rather than `Text`. This is intentional —
33//! a Text payload that happens to be a well-formed data URL is
34//! treated as media on every other entry point (the
35//! `From<TextContent>` arm in the SDK does the same thing), and
36//! splitting the behaviour here would be more surprising than
37//! the round-trip loss.
38//! 3. **`RichContentPart::VideoUrl` collapses to `InputVideo`.** The
39//! forward path treats `InputVideo` and `VideoUrl` as the same
40//! carrier (data URL → unmarked `Text`, remote URL → `Text` with
41//! `kind = "input_video_remote"`), so the reverse arm always
42//! rebuilds an `InputVideo` regardless of which variant the
43//! forward call started with. Intentional: the distinction was
44//! only meaningful for the legacy double-encoding, and unifying
45//! the two halves the marker surface area.
46//!
47//! ## `_meta` markers
48//!
49//! Three keys, all namespaced under `objectiveai/`:
50//!
51//! - **`objectiveai/kind`** (Text carrier only) — enum string,
52//! tells the reverse arm the Text block is the encoded form of a
53//! non-Text variant. Values:
54//! - `"image_url_remote"`: body is a remote URL for an
55//! [`RichContentPart::ImageUrl`].
56//! - `"input_video_remote"`: body is a remote URL for an
57//! [`RichContentPart::InputVideo`]. Also emitted for remote
58//! [`RichContentPart::VideoUrl`] — the two video variants share
59//! a forward path and both round-trip to `InputVideo`.
60//! - `"file_url"`: body is a remote URL for a
61//! [`RichContentPart::File`] whose primary field is `file_url`.
62//! - `"file_id"`: body is an opaque ID for a
63//! [`RichContentPart::File`] whose primary field is `file_id`.
64//! - **`objectiveai/image_detail`** (Image carrier, or Text with
65//! `kind: "image_url_remote"`) — preserves
66//! [`ImageUrl::detail`](crate::agent::completions::message::ImageUrl::detail).
67//! - **`objectiveai/filename`** (any carrier representing a
68//! [`RichContentPart::File`]) — preserves
69//! [`File::filename`](crate::agent::completions::message::File::filename).
70
71use schemars::JsonSchema;
72use serde::{Deserialize, Serialize};
73
74/// `_meta` key namespacing all round-trip markers, so we don't
75/// collide with any official MCP metadata convention.
76const META_KIND: &str = "objectiveai/kind";
77const META_IMAGE_DETAIL: &str = "objectiveai/image_detail";
78const META_FILENAME: &str = "objectiveai/filename";
79
80/// `objectiveai/kind` enum tag values.
81const KIND_IMAGE_URL_REMOTE: &str = "image_url_remote";
82const KIND_INPUT_VIDEO_REMOTE: &str = "input_video_remote";
83const KIND_FILE_URL: &str = "file_url";
84const KIND_FILE_ID: &str = "file_id";
85
86/// A content block that can be used in prompts and tool results.
87#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
88#[serde(tag = "type")]
89#[schemars(rename = "mcp.tool.ContentBlock")]
90pub enum ContentBlock {
91 /// Text content.
92 #[serde(rename = "text")]
93 #[schemars(title = "Text")]
94 Text(super::TextContent),
95 /// Image content (base64-encoded).
96 #[serde(rename = "image")]
97 #[schemars(title = "Image")]
98 Image(super::ImageContent),
99 /// Audio content (base64-encoded).
100 #[serde(rename = "audio")]
101 #[schemars(title = "Audio")]
102 Audio(super::AudioContent),
103 /// A resource link.
104 #[serde(rename = "resource_link")]
105 #[schemars(title = "ResourceLink")]
106 ResourceLink(super::ResourceLink),
107 /// An embedded resource.
108 #[serde(rename = "resource")]
109 #[schemars(title = "EmbeddedResource")]
110 EmbeddedResource(super::EmbeddedResource),
111}
112
113/// Convert a single `RichContentPart` into a `ContentBlock`.
114///
115/// Forward path produces only `Text`, `Image`, and `Audio` carriers
116/// — never `EmbeddedResource` or `ResourceLink`. See the
117/// module-level docs for the round-trip property and the `_meta`
118/// marker catalogue.
119///
120/// Upstream-specific converters (`claude_agent_sdk`, `codex_sdk`)
121/// use stricter `TryFrom` impls that reject unsupported parts. This
122/// `From` is the generic, round-trip-preserving path used by
123/// `agent/completions/notify` and the MCP tool-response formatter.
124impl From<crate::agent::completions::message::RichContentPart>
125 for ContentBlock
126{
127 fn from(part: crate::agent::completions::message::RichContentPart) -> Self {
128 use crate::agent::completions::message::RichContentPart;
129 match part {
130 RichContentPart::Text { text } => {
131 ContentBlock::Text(super::TextContent {
132 text,
133 annotations: None,
134 _meta: None,
135 })
136 }
137 RichContentPart::ImageUrl { image_url } => image_url.into(),
138 RichContentPart::InputAudio { input_audio } => input_audio.into(),
139 // Both video variants share a forward path; reverse always
140 // rebuilds `InputVideo`. See the third round-trip exception
141 // in the module-level docs.
142 RichContentPart::InputVideo { video_url } => video_url.into(),
143 RichContentPart::VideoUrl { video_url } => video_url.into(),
144 RichContentPart::File { file } => file.into(),
145 }
146 }
147}
148
149/// Direct conversion from a typed `ImageUrl` to a `ContentBlock`.
150/// Same body as the `RichContentPart::ImageUrl` arm of
151/// [`From<RichContentPart> for ContentBlock`] — kept independent so
152/// per-leaf `CommandResponse::into_mcp` impls (whose `Response` is
153/// already an `ImageUrl`) can call `image_url.into()` without first
154/// wrapping in `RichContentPart`.
155impl From<crate::agent::completions::message::ImageUrl> for ContentBlock {
156 fn from(image_url: crate::agent::completions::message::ImageUrl) -> Self {
157 // Serialize detail (an enum) once via serde_json so
158 // we hand markers a Value::String("auto"|"low"|"high"),
159 // not the typed enum literal.
160 let detail_value = image_url
161 .detail
162 .as_ref()
163 .and_then(|d| serde_json::to_value(d).ok());
164 match super::ImageContent::try_from(image_url) {
165 Ok(mut ic) => {
166 // Data-URL path: lossless Image carrier.
167 // Detail (when present) rides in _meta.
168 if let Some(v) = detail_value {
169 let mut m = indexmap::IndexMap::new();
170 m.insert(META_IMAGE_DETAIL.to_string(), v);
171 ic._meta = Some(m);
172 }
173 ContentBlock::Image(ic)
174 }
175 Err(err) => {
176 // Remote URL: stash on Text with kind marker
177 // so reverse can rebuild ImageUrl.
178 let mut meta = indexmap::IndexMap::new();
179 meta.insert(
180 META_KIND.to_string(),
181 serde_json::Value::String(
182 KIND_IMAGE_URL_REMOTE.to_string(),
183 ),
184 );
185 if let Some(v) = detail_value {
186 meta.insert(META_IMAGE_DETAIL.to_string(), v);
187 }
188 ContentBlock::Text(super::TextContent {
189 text: err.url,
190 annotations: None,
191 _meta: Some(meta),
192 })
193 }
194 }
195 }
196}
197
198/// Direct conversion from a typed `InputAudio` to a `ContentBlock`.
199/// Same body as the `RichContentPart::InputAudio` arm of
200/// [`From<RichContentPart> for ContentBlock`].
201impl From<crate::agent::completions::message::InputAudio> for ContentBlock {
202 fn from(input_audio: crate::agent::completions::message::InputAudio) -> Self {
203 ContentBlock::Audio(input_audio.into())
204 }
205}
206
207/// Direct conversion from a typed `VideoUrl` to a `ContentBlock`.
208/// Shared forward path for both `RichContentPart::InputVideo` and
209/// `RichContentPart::VideoUrl`: data-URL videos round-trip via the
210/// default reverse heuristic (parse_data_url → video/* mime →
211/// InputVideo), so no marker. Remote URLs get
212/// `META_KIND = "input_video_remote"` so the reverse rebuilds an
213/// `InputVideo`. See the third round-trip exception in the
214/// module-level docs.
215impl From<crate::agent::completions::message::VideoUrl> for ContentBlock {
216 fn from(video_url: crate::agent::completions::message::VideoUrl) -> Self {
217 if crate::data_url::parse_data_url(&video_url.url).is_some() {
218 ContentBlock::Text(super::TextContent {
219 text: video_url.url,
220 annotations: None,
221 _meta: None,
222 })
223 } else {
224 ContentBlock::Text(super::TextContent {
225 text: video_url.url,
226 annotations: None,
227 _meta: Some(single_meta(
228 META_KIND,
229 KIND_INPUT_VIDEO_REMOTE.to_string(),
230 )),
231 })
232 }
233 }
234}
235
236/// Direct conversion from a typed `File` to a `ContentBlock`.
237/// Multi-field collapse:
238/// `file_data` > `file_url` > `file_id` by precedence. Lower-priority
239/// fields are dropped; `filename` rides through via `_meta`.
240impl From<crate::agent::completions::message::File> for ContentBlock {
241 fn from(file: crate::agent::completions::message::File) -> Self {
242 let filename = file.filename.clone();
243 if let Some(blob) = file.file_data {
244 // Encode as a Text(data:application/octet-stream;base64,...)
245 // — the heuristic reverse decodes it into a File. Filename
246 // rides in _meta.
247 let body = format!("data:application/octet-stream;base64,{blob}");
248 let meta = filename.map(|n| single_meta(META_FILENAME, n));
249 ContentBlock::Text(super::TextContent {
250 text: body,
251 annotations: None,
252 _meta: meta,
253 })
254 } else if let Some(url) = file.file_url {
255 let mut m = single_meta(META_KIND, KIND_FILE_URL.to_string());
256 if let Some(n) = filename {
257 m.insert(META_FILENAME.to_string(), serde_json::Value::String(n));
258 }
259 ContentBlock::Text(super::TextContent {
260 text: url,
261 annotations: None,
262 _meta: Some(m),
263 })
264 } else if let Some(id) = file.file_id {
265 let mut m = single_meta(META_KIND, KIND_FILE_ID.to_string());
266 if let Some(n) = filename {
267 m.insert(META_FILENAME.to_string(), serde_json::Value::String(n));
268 }
269 ContentBlock::Text(super::TextContent {
270 text: id,
271 annotations: None,
272 _meta: Some(m),
273 })
274 } else {
275 // Empty File: nothing to encode. Produce a Text("") carrier
276 // with no markers. Reverse will land it as a Text part —
277 // which is a minor round-trip loss for the (unusual)
278 // empty-File case. Document this in the round-trip caveats.
279 ContentBlock::Text(super::TextContent {
280 text: String::new(),
281 annotations: None,
282 _meta: None,
283 })
284 }
285 }
286}
287
288/// Build a single-entry `_meta` map.
289fn single_meta(
290 key: &str,
291 value: String,
292) -> indexmap::IndexMap<String, serde_json::Value> {
293 let mut m = indexmap::IndexMap::new();
294 m.insert(key.to_string(), serde_json::Value::String(value));
295 m
296}
297
298/// Flatten a `RichContent` into the MCP `Vec<ContentBlock>` shape used
299/// by `POST /notify` and tool results. `RichContent::Text` yields a
300/// single text block; `RichContent::Parts` delegates per-element to
301/// [`From<RichContentPart>`].
302impl From<crate::agent::completions::message::RichContent>
303 for Vec<ContentBlock>
304{
305 fn from(content: crate::agent::completions::message::RichContent) -> Self {
306 use crate::agent::completions::message::RichContent;
307 match content {
308 RichContent::Text(text) => {
309 vec![ContentBlock::Text(super::TextContent {
310 text,
311 annotations: None,
312 _meta: None,
313 })]
314 }
315 RichContent::Parts(parts) => {
316 parts.into_iter().map(Into::into).collect()
317 }
318 }
319 }
320}
321
322#[cfg(test)]
323mod round_trip_tests {
324 use super::*;
325 use crate::agent::completions::message::{
326 File, ImageUrl, ImageUrlDetail, InputAudio, RichContentPart, VideoUrl,
327 };
328
329 /// Normalize a RichContentPart for round-trip comparison: clear
330 /// `File::filename` (which round-trips via `_meta` but the test
331 /// candidates set it to verify the marker is honored) — used
332 /// only when the test expects a documented loss.
333 fn norm(part: &mut RichContentPart) {
334 // Currently nothing to clear universally; filename round-
335 // trips via the `objectiveai/filename` marker. Kept as a
336 // hook for future documented losses.
337 let _ = part;
338 }
339
340 fn assert_round_trips(part: RichContentPart) {
341 let mut expected = part.clone();
342 norm(&mut expected);
343 let block: ContentBlock = part.into();
344 let mut back: RichContentPart = block.into();
345 norm(&mut back);
346 assert_eq!(
347 expected, back,
348 "round-trip mismatch: expected {expected:?}, got {back:?}"
349 );
350 }
351
352 #[test]
353 fn rt_text_plain() {
354 assert_round_trips(RichContentPart::Text {
355 text: "hello world".into(),
356 });
357 }
358
359 #[test]
360 fn rt_image_url_data_url_no_detail() {
361 assert_round_trips(RichContentPart::ImageUrl {
362 image_url: ImageUrl {
363 url: "data:image/png;base64,iVBORw0KGgo".into(),
364 detail: None,
365 },
366 });
367 }
368
369 #[test]
370 fn rt_image_url_data_url_with_detail_high() {
371 assert_round_trips(RichContentPart::ImageUrl {
372 image_url: ImageUrl {
373 url: "data:image/png;base64,iVBORw0KGgo".into(),
374 detail: Some(ImageUrlDetail::High),
375 },
376 });
377 }
378
379 #[test]
380 fn rt_image_url_data_url_with_detail_low() {
381 assert_round_trips(RichContentPart::ImageUrl {
382 image_url: ImageUrl {
383 url: "data:image/jpeg;base64,/9j/4AAQ".into(),
384 detail: Some(ImageUrlDetail::Low),
385 },
386 });
387 }
388
389 #[test]
390 fn rt_image_url_remote_url_no_detail() {
391 assert_round_trips(RichContentPart::ImageUrl {
392 image_url: ImageUrl {
393 url: "https://example.com/a.png".into(),
394 detail: None,
395 },
396 });
397 }
398
399 #[test]
400 fn rt_image_url_remote_url_with_detail() {
401 assert_round_trips(RichContentPart::ImageUrl {
402 image_url: ImageUrl {
403 url: "https://example.com/a.png".into(),
404 detail: Some(ImageUrlDetail::Auto),
405 },
406 });
407 }
408
409 #[test]
410 fn rt_input_audio() {
411 assert_round_trips(RichContentPart::InputAudio {
412 input_audio: InputAudio {
413 data: "SUQzBAA".into(),
414 format: "audio/mpeg".into(),
415 },
416 });
417 }
418
419 #[test]
420 fn rt_input_video_data_url() {
421 assert_round_trips(RichContentPart::InputVideo {
422 video_url: VideoUrl {
423 url: "data:video/mp4;base64,AAAA".into(),
424 },
425 });
426 }
427
428 #[test]
429 fn rt_input_video_remote_url() {
430 assert_round_trips(RichContentPart::InputVideo {
431 video_url: VideoUrl {
432 url: "https://example.com/v.mp4".into(),
433 },
434 });
435 }
436
437 #[test]
438 fn rt_file_with_file_data_no_filename() {
439 assert_round_trips(RichContentPart::File {
440 file: File {
441 file_data: Some("JVBERi0".into()),
442 filename: None,
443 file_id: None,
444 file_url: None,
445 },
446 });
447 }
448
449 #[test]
450 fn rt_file_with_file_data_and_filename() {
451 assert_round_trips(RichContentPart::File {
452 file: File {
453 file_data: Some("JVBERi0".into()),
454 filename: Some("report.pdf".into()),
455 file_id: None,
456 file_url: None,
457 },
458 });
459 }
460
461 #[test]
462 fn rt_file_with_file_url() {
463 assert_round_trips(RichContentPart::File {
464 file: File {
465 file_data: None,
466 filename: Some("remote.bin".into()),
467 file_id: None,
468 file_url: Some("https://example.com/x.bin".into()),
469 },
470 });
471 }
472
473 #[test]
474 fn rt_file_with_file_id() {
475 assert_round_trips(RichContentPart::File {
476 file: File {
477 file_data: None,
478 filename: Some("upstream-name.txt".into()),
479 file_id: Some("file-abc123".into()),
480 file_url: None,
481 },
482 });
483 }
484
485 /// Documented round-trip exception (case 1): multi-field File
486 /// collapses to the highest-precedence field (file_data >
487 /// file_url > file_id). The reverse only recovers the primary
488 /// field plus `filename`.
489 #[test]
490 fn rt_file_multifield_collapses_to_file_data() {
491 let input = RichContentPart::File {
492 file: File {
493 file_data: Some("JVBERi0".into()),
494 filename: Some("multi.bin".into()),
495 file_id: Some("ignored-id".into()),
496 file_url: Some("https://example.com/ignored".into()),
497 },
498 };
499 let block: ContentBlock = input.into();
500 let back: RichContentPart = block.into();
501 let expected = RichContentPart::File {
502 file: File {
503 file_data: Some("JVBERi0".into()),
504 filename: Some("multi.bin".into()),
505 file_id: None,
506 file_url: None,
507 },
508 };
509 assert_eq!(back, expected);
510 }
511
512 /// Documented round-trip exception (case 2): a Text part whose
513 /// body is a well-formed data URL forward-converts to Text(t)
514 /// with no marker; the reverse spots the data URL and returns
515 /// a media variant. Lock the actual behaviour in here so a
516 /// future regression that "fixes" this case is caught.
517 #[test]
518 fn rt_text_containing_data_url_decodes_to_media() {
519 let input = RichContentPart::Text {
520 text: "data:image/png;base64,iVBORw0KGgo".into(),
521 };
522 let block: ContentBlock = input.into();
523 let back: RichContentPart = block.into();
524 assert!(
525 matches!(back, RichContentPart::ImageUrl { .. }),
526 "expected media, got {back:?}"
527 );
528 }
529}