Skip to main content

objectiveai_sdk/agent/completions/message/
rich_content.rs

1//! Rich content types for user/assistant messages (supports multimodal input).
2
3use crate::functions;
4use functions::expression::{
5    ExpressionError, FromStarlarkValue, ToStarlarkValue, WithExpression,
6};
7use serde::{Deserialize, Serialize};
8use schemars::JsonSchema;
9use starlark::values::dict::{
10    AllocDict as StarlarkAllocDict, DictRef as StarlarkDictRef,
11};
12use starlark::values::{
13    Heap as StarlarkHeap, UnpackValue, Value as StarlarkValue,
14};
15
16/// Rich content for user/assistant messages (supports multimodal input).
17#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema, arbitrary::Arbitrary)]
18#[serde(untagged)]
19#[schemars(rename = "agent.completions.message.RichContent")]
20pub enum RichContent {
21    /// Plain text content.
22    #[schemars(title = "Text")]
23    Text(String),
24    /// Multi-part content (text, images, audio, video, files).
25    #[schemars(title = "Parts")]
26    Parts(Vec<RichContentPart>),
27}
28
29impl RichContent {
30    pub fn push(&mut self, other: &RichContent) {
31        match (&mut *self, other) {
32            (RichContent::Text(self_text), RichContent::Text(other_text)) => {
33                self_text.push_str(&other_text);
34            }
35            (RichContent::Text(self_text), RichContent::Parts(other_parts)) => {
36                let mut parts = Vec::with_capacity(1 + other_parts.len());
37                parts.push(RichContentPart::Text {
38                    text: std::mem::take(self_text),
39                });
40                parts.extend(other_parts.iter().cloned());
41                *self = RichContent::Parts(parts);
42            }
43            (RichContent::Parts(self_parts), RichContent::Text(other_text)) => {
44                self_parts.push(RichContentPart::Text {
45                    text: other_text.clone(),
46                });
47            }
48            (
49                RichContent::Parts(self_parts),
50                RichContent::Parts(other_parts),
51            ) => {
52                self_parts.extend(other_parts.iter().cloned());
53            }
54        }
55    }
56
57    /// Prepares the content by normalizing parts.
58    ///
59    /// This consolidates consecutive text parts, removes empty parts,
60    /// and converts single-part content to plain text.
61    pub fn prepare(&mut self) {
62        // nothing to prepare for plain text
63        let parts = match self {
64            RichContent::Text(_) => return,
65            RichContent::Parts(parts) => parts,
66        };
67
68        // prepare all parts
69        parts.iter_mut().for_each(RichContentPart::prepare);
70
71        // join consecutive text parts + remove empty parts
72        let mut final_parts = Vec::with_capacity(parts.len());
73        let mut buffer: Option<String> = None;
74        for part in parts.drain(..) {
75            match part {
76                part if part.is_empty() => continue,
77                RichContentPart::Text { text } => {
78                    if let Some(buffer) = &mut buffer {
79                        buffer.push_str(&text);
80                    } else {
81                        buffer = Some(text);
82                    }
83                }
84                part => {
85                    if let Some(buffer) = buffer.take() {
86                        final_parts
87                            .push(RichContentPart::Text { text: buffer });
88                    }
89                    final_parts.push(part);
90                }
91            }
92        }
93        if let Some(buffer) = buffer.take() {
94            final_parts.push(RichContentPart::Text { text: buffer });
95        }
96
97        // replace self with final parts
98        if final_parts.len() == 1
99            && matches!(&final_parts[0], RichContentPart::Text { .. })
100        {
101            match final_parts.into_iter().next() {
102                Some(RichContentPart::Text { text }) => {
103                    *self = RichContent::Text(text);
104                }
105                _ => unreachable!(),
106            }
107        } else {
108            *self = RichContent::Parts(final_parts);
109        }
110    }
111
112    /// Returns `true` if the content is empty.
113    pub fn is_empty(&self) -> bool {
114        match self {
115            RichContent::Text(text) => text.is_empty(),
116            RichContent::Parts(parts) => parts.is_empty(),
117        }
118    }
119
120    /// Extracts media files from this content.
121    ///
122    /// Returns `(content_json, files)` where `content_json` is the content
123    /// with extractable media replaced by `{"type": "reference", "path": ...}`
124    /// references, and `files` is the list of [`LogFile`]s.
125    ///
126    /// `route_base` is the route prefix (e.g. `"agents/completions"`).
127    /// `id` and `message_index` identify the parent message.
128    #[cfg(feature = "filesystem")]
129    pub fn extract_media(
130        self,
131        route_base: &str,
132        id: &str,
133        message_index: u64,
134    ) -> (serde_json::Value, Vec<crate::filesystem::logs::LogFile>) {
135        let parts = match self {
136            RichContent::Text(text) => return (serde_json::Value::String(text), Vec::new()),
137            RichContent::Parts(parts) => parts,
138        };
139
140        let mut json_parts = Vec::with_capacity(parts.len());
141        let mut files = Vec::new();
142
143        for (part_idx, part) in parts.into_iter().enumerate() {
144            let fc_and_type: Option<(super::FileContent, &str)> = match &part {
145                RichContentPart::ImageUrl { image_url } => {
146                    image_url.file_content().map(|fc| (fc, "image"))
147                }
148                RichContentPart::InputAudio { input_audio } => {
149                    input_audio.file_content().map(|fc| (fc, "audio"))
150                }
151                RichContentPart::InputVideo { video_url }
152                | RichContentPart::VideoUrl { video_url } => {
153                    video_url.file_content().map(|fc| (fc, "video"))
154                }
155                RichContentPart::File { file } => {
156                    file.file_content().map(|fc| (fc, "file"))
157                }
158                _ => None,
159            };
160
161            if let Some((fc, media_type)) = fc_and_type {
162                if let Ok(decoded) = fc.decode() {
163                    let log_file = crate::filesystem::logs::LogFile {
164                        route: format!("{route_base}/messages/{media_type}"),
165                        id: id.to_string(),
166                        message_index: Some(message_index),
167                        media_index: Some(part_idx as u64),
168                        extension: fc.extension.to_string(),
169                        content: decoded,
170                    };
171                    json_parts.push(serde_json::json!({
172                        "type": "reference",
173                        "path": log_file.path(),
174                    }));
175                    files.push(log_file);
176                } else {
177                    json_parts.push(serde_json::to_value(&part).unwrap());
178                }
179            } else {
180                json_parts.push(serde_json::to_value(&part).unwrap());
181            }
182        }
183
184        (serde_json::Value::Array(json_parts), files)
185    }
186
187    /// Computes a content-addressed ID for this content.
188    pub fn id(&self) -> String {
189        let mut hasher = twox_hash::XxHash3_128::with_seed(0);
190        hasher.write(serde_json::to_string(self).unwrap().as_bytes());
191        format!("{:0>22}", base62::encode(hasher.finish_128()))
192    }
193
194    /// Validates that this content contains only text or image parts.
195    ///
196    /// Used by upstream agent definitions whose prefix/suffix content
197    /// rendering can only express text and image media (audio, video, and
198    /// file parts have no representation in those upstreams' prompts).
199    /// Returns `Err` naming the offending part variant if any non-text /
200    /// non-image part is present.
201    pub fn validate_text_or_image_only(&self) -> Result<(), String> {
202        match self {
203            RichContent::Text(_) => Ok(()),
204            RichContent::Parts(parts) => {
205                for (idx, part) in parts.iter().enumerate() {
206                    match part {
207                        RichContentPart::Text { .. }
208                        | RichContentPart::ImageUrl { .. } => {}
209                        RichContentPart::InputAudio { .. } => {
210                            return Err(format!(
211                                "part[{idx}] has unsupported media type `input_audio`; only text and image parts are allowed"
212                            ));
213                        }
214                        RichContentPart::InputVideo { .. } => {
215                            return Err(format!(
216                                "part[{idx}] has unsupported media type `input_video`; only text and image parts are allowed"
217                            ));
218                        }
219                        RichContentPart::VideoUrl { .. } => {
220                            return Err(format!(
221                                "part[{idx}] has unsupported media type `video_url`; only text and image parts are allowed"
222                            ));
223                        }
224                        RichContentPart::File { .. } => {
225                            return Err(format!(
226                                "part[{idx}] has unsupported media type `file`; only text and image parts are allowed"
227                            ));
228                        }
229                    }
230                }
231                Ok(())
232            }
233        }
234    }
235}
236
237impl FromStarlarkValue for RichContent {
238    fn from_starlark_value(
239        value: &StarlarkValue,
240    ) -> Result<Self, ExpressionError> {
241        if let Ok(Some(s)) = <&str as UnpackValue>::unpack_value(*value) {
242            return Ok(RichContent::Text(s.to_owned()));
243        }
244        let parts = Vec::<RichContentPart>::from_starlark_value(value)?;
245        Ok(RichContent::Parts(parts))
246    }
247}
248
249/// Expression variant of [`RichContent`] for dynamic content.
250#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema, arbitrary::Arbitrary)]
251#[serde(untagged)]
252#[schemars(rename = "agent.completions.message.RichContentExpression")]
253pub enum RichContentExpression {
254    /// Plain text content.
255    #[schemars(title = "Text")]
256    Text(String),
257    /// Multi-part content expressions.
258    #[schemars(title = "Parts")]
259    Parts(
260        Vec<functions::expression::WithExpression<RichContentPartExpression>>,
261    ),
262}
263
264impl RichContentExpression {
265    /// Compiles the expression into a concrete [`RichContent`].
266    pub fn compile(
267        self,
268        params: &functions::expression::Params,
269    ) -> Result<RichContent, functions::expression::ExpressionError> {
270        match self {
271            RichContentExpression::Text(text) => Ok(RichContent::Text(text)),
272            RichContentExpression::Parts(parts) => {
273                let mut compiled_parts = Vec::with_capacity(parts.len());
274                for part in parts {
275                    match part.compile_one_or_many(params)? {
276                        functions::expression::OneOrMany::One(one_part) => {
277                            compiled_parts.push(one_part.compile(params)?);
278                        }
279                        functions::expression::OneOrMany::Many(many_parts) => {
280                            for part in many_parts {
281                                compiled_parts.push(part.compile(params)?);
282                            }
283                        }
284                    }
285                }
286                Ok(RichContent::Parts(compiled_parts))
287            }
288        }
289    }
290}
291
292impl From<RichContent> for RichContentExpression {
293    fn from(content: RichContent) -> Self {
294        match content {
295            RichContent::Text(text) => RichContentExpression::Text(text),
296            RichContent::Parts(parts) => RichContentExpression::Parts(
297                parts
298                    .into_iter()
299                    .map(RichContentPartExpression::from)
300                    .map(WithExpression::Value)
301                    .collect(),
302            ),
303        }
304    }
305}
306
307impl FromStarlarkValue for RichContentExpression {
308    fn from_starlark_value(
309        value: &StarlarkValue,
310    ) -> Result<Self, ExpressionError> {
311        if let Ok(Some(s)) = <&str as UnpackValue>::unpack_value(*value) {
312            return Ok(RichContentExpression::Text(s.to_owned()));
313        }
314        let parts = Vec::<WithExpression<RichContentPartExpression>>::from_starlark_value(value)?;
315        Ok(RichContentExpression::Parts(parts))
316    }
317}
318
319/// A part of rich content.
320#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize, JsonSchema, arbitrary::Arbitrary)]
321#[serde(tag = "type", rename_all = "snake_case")]
322#[schemars(rename = "agent.completions.message.RichContentPart")]
323pub enum RichContentPart {
324    /// Text content.
325    #[schemars(title = "Text")]
326    Text { text: String },
327    /// An image URL.
328    #[schemars(title = "ImageUrl")]
329    ImageUrl { image_url: ImageUrl },
330    /// Audio input.
331    #[schemars(title = "InputAudio")]
332    InputAudio { input_audio: InputAudio },
333    /// Video input.
334    #[schemars(title = "InputVideo")]
335    InputVideo { video_url: VideoUrl },
336    /// A video URL.
337    #[schemars(title = "VideoUrl")]
338    VideoUrl { video_url: VideoUrl },
339    /// A file.
340    #[schemars(title = "File")]
341    File { file: File },
342}
343
344impl RichContentPart {
345    /// Prepares the content part by normalizing optional fields.
346    pub fn prepare(&mut self) {
347        match self {
348            RichContentPart::Text { .. } => {}
349            RichContentPart::ImageUrl { image_url } => {
350                image_url.prepare();
351            }
352            RichContentPart::InputAudio { .. } => {}
353            RichContentPart::InputVideo { .. } => {}
354            RichContentPart::VideoUrl { .. } => {}
355            RichContentPart::File { file } => {
356                file.prepare();
357            }
358        }
359    }
360
361    /// Returns `true` if the content part is empty.
362    pub fn is_empty(&self) -> bool {
363        match self {
364            RichContentPart::Text { text } => text.is_empty(),
365            RichContentPart::ImageUrl { image_url } => image_url.is_empty(),
366            RichContentPart::InputAudio { input_audio } => {
367                input_audio.is_empty()
368            }
369            RichContentPart::InputVideo { video_url } => video_url.is_empty(),
370            RichContentPart::VideoUrl { video_url } => video_url.is_empty(),
371            RichContentPart::File { file } => file.is_empty(),
372        }
373    }
374}
375
376impl ToStarlarkValue for RichContentPart {
377    fn to_starlark_value<'v>(
378        &self,
379        heap: &'v StarlarkHeap,
380    ) -> StarlarkValue<'v> {
381        match self {
382            RichContentPart::Text { text } => heap.alloc(StarlarkAllocDict([
383                ("type", "text".to_starlark_value(heap)),
384                ("text", text.to_starlark_value(heap)),
385            ])),
386            RichContentPart::ImageUrl { image_url } => {
387                heap.alloc(StarlarkAllocDict([
388                    ("type", "image_url".to_starlark_value(heap)),
389                    ("image_url", image_url.to_starlark_value(heap)),
390                ]))
391            }
392            RichContentPart::InputAudio { input_audio } => {
393                heap.alloc(StarlarkAllocDict([
394                    ("type", "input_audio".to_starlark_value(heap)),
395                    ("input_audio", input_audio.to_starlark_value(heap)),
396                ]))
397            }
398            RichContentPart::InputVideo { video_url } => {
399                heap.alloc(StarlarkAllocDict([
400                    ("type", "input_video".to_starlark_value(heap)),
401                    ("video_url", video_url.to_starlark_value(heap)),
402                ]))
403            }
404            RichContentPart::VideoUrl { video_url } => {
405                heap.alloc(StarlarkAllocDict([
406                    ("type", "video_url".to_starlark_value(heap)),
407                    ("video_url", video_url.to_starlark_value(heap)),
408                ]))
409            }
410            RichContentPart::File { file } => heap.alloc(StarlarkAllocDict([
411                ("type", "file".to_starlark_value(heap)),
412                ("file", file.to_starlark_value(heap)),
413            ])),
414        }
415    }
416}
417
418impl FromStarlarkValue for RichContentPart {
419    fn from_starlark_value(
420        value: &StarlarkValue,
421    ) -> Result<Self, ExpressionError> {
422        let dict = StarlarkDictRef::from_value(*value).ok_or_else(|| {
423            ExpressionError::StarlarkConversionError(
424                "RichContentPart: expected dict".into(),
425            )
426        })?;
427        // First pass: find the type
428        let mut typ = None;
429        for (k, v) in dict.iter() {
430            if let Ok(Some("type")) = <&str as UnpackValue>::unpack_value(k) {
431                typ = Some(
432                    <&str as UnpackValue>::unpack_value(v)
433                        .map_err(|e| {
434                            ExpressionError::StarlarkConversionError(
435                                e.to_string(),
436                            )
437                        })?
438                        .ok_or_else(|| {
439                            ExpressionError::StarlarkConversionError(
440                                "RichContentPart: expected string type".into(),
441                            )
442                        })?,
443                );
444                break;
445            }
446        }
447        let typ = typ.ok_or_else(|| {
448            ExpressionError::StarlarkConversionError(
449                "RichContentPart: missing type".into(),
450            )
451        })?;
452        // Second pass: find the payload by expected key
453        let payload_key = match typ {
454            "text" => "text",
455            "image_url" => "image_url",
456            "input_audio" => "input_audio",
457            "input_video" | "video_url" => "video_url",
458            "file" => "file",
459            _ => {
460                return Err(ExpressionError::StarlarkConversionError(format!(
461                    "RichContentPart: unknown type: {}",
462                    typ
463                )));
464            }
465        };
466        let mut payload = None;
467        for (k, v) in dict.iter() {
468            if let Ok(Some(key)) = <&str as UnpackValue>::unpack_value(k) {
469                if key == payload_key {
470                    payload = Some(v);
471                    break;
472                }
473            }
474        }
475        let v = payload.ok_or_else(|| {
476            ExpressionError::StarlarkConversionError(format!(
477                "RichContentPart: missing {}",
478                payload_key
479            ))
480        })?;
481        match typ {
482            "text" => Ok(RichContentPart::Text {
483                text: String::from_starlark_value(&v)?,
484            }),
485            "image_url" => Ok(RichContentPart::ImageUrl {
486                image_url: ImageUrl::from_starlark_value(&v)?,
487            }),
488            "input_audio" => Ok(RichContentPart::InputAudio {
489                input_audio: InputAudio::from_starlark_value(&v)?,
490            }),
491            "input_video" => Ok(RichContentPart::InputVideo {
492                video_url: VideoUrl::from_starlark_value(&v)?,
493            }),
494            "video_url" => Ok(RichContentPart::VideoUrl {
495                video_url: VideoUrl::from_starlark_value(&v)?,
496            }),
497            "file" => Ok(RichContentPart::File {
498                file: File::from_starlark_value(&v)?,
499            }),
500            _ => unreachable!(),
501        }
502    }
503}
504
505/// Expression variant of [`RichContentPart`] for dynamic content.
506#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema, arbitrary::Arbitrary)]
507#[serde(tag = "type", rename_all = "snake_case")]
508#[schemars(rename = "agent.completions.message.RichContentPartExpression")]
509pub enum RichContentPartExpression {
510    #[schemars(title = "Text")]
511    Text {
512        text: functions::expression::WithExpression<String>,
513    },
514    #[schemars(title = "ImageUrl")]
515    ImageUrl {
516        image_url: functions::expression::WithExpression<ImageUrl>,
517    },
518    #[schemars(title = "InputAudio")]
519    InputAudio {
520        input_audio: functions::expression::WithExpression<InputAudio>,
521    },
522    #[schemars(title = "InputVideo")]
523    InputVideo {
524        video_url: functions::expression::WithExpression<VideoUrl>,
525    },
526    #[schemars(title = "VideoUrl")]
527    VideoUrl {
528        video_url: functions::expression::WithExpression<VideoUrl>,
529    },
530    #[schemars(title = "File")]
531    File {
532        file: functions::expression::WithExpression<File>,
533    },
534}
535
536impl RichContentPartExpression {
537    /// Compiles the expression into a concrete [`RichContentPart`].
538    pub fn compile(
539        self,
540        params: &functions::expression::Params,
541    ) -> Result<RichContentPart, functions::expression::ExpressionError> {
542        match self {
543            RichContentPartExpression::Text { text } => {
544                let text = text.compile_one(params)?;
545                Ok(RichContentPart::Text { text })
546            }
547            RichContentPartExpression::ImageUrl { image_url } => {
548                let image_url = image_url.compile_one(params)?;
549                Ok(RichContentPart::ImageUrl { image_url })
550            }
551            RichContentPartExpression::InputAudio { input_audio } => {
552                let input_audio = input_audio.compile_one(params)?;
553                Ok(RichContentPart::InputAudio { input_audio })
554            }
555            RichContentPartExpression::InputVideo { video_url } => {
556                let video_url = video_url.compile_one(params)?;
557                Ok(RichContentPart::InputVideo { video_url })
558            }
559            RichContentPartExpression::VideoUrl { video_url } => {
560                let video_url = video_url.compile_one(params)?;
561                Ok(RichContentPart::VideoUrl { video_url })
562            }
563            RichContentPartExpression::File { file } => {
564                let file = file.compile_one(params)?;
565                Ok(RichContentPart::File { file })
566            }
567        }
568    }
569}
570
571impl From<RichContentPart> for RichContentPartExpression {
572    fn from(part: RichContentPart) -> Self {
573        match part {
574            RichContentPart::Text { text } => RichContentPartExpression::Text {
575                text: WithExpression::Value(text),
576            },
577            RichContentPart::ImageUrl { image_url } => {
578                RichContentPartExpression::ImageUrl {
579                    image_url: WithExpression::Value(image_url),
580                }
581            }
582            RichContentPart::InputAudio { input_audio } => {
583                RichContentPartExpression::InputAudio {
584                    input_audio: WithExpression::Value(input_audio),
585                }
586            }
587            RichContentPart::InputVideo { video_url } => {
588                RichContentPartExpression::InputVideo {
589                    video_url: WithExpression::Value(video_url),
590                }
591            }
592            RichContentPart::VideoUrl { video_url } => {
593                RichContentPartExpression::VideoUrl {
594                    video_url: WithExpression::Value(video_url),
595                }
596            }
597            RichContentPart::File { file } => RichContentPartExpression::File {
598                file: WithExpression::Value(file),
599            },
600        }
601    }
602}
603
604impl FromStarlarkValue for RichContentPartExpression {
605    fn from_starlark_value(
606        value: &StarlarkValue,
607    ) -> Result<Self, ExpressionError> {
608        let part = RichContentPart::from_starlark_value(value)?;
609        match part {
610            RichContentPart::Text { text } => {
611                Ok(RichContentPartExpression::Text {
612                    text: WithExpression::Value(text),
613                })
614            }
615            RichContentPart::ImageUrl { image_url } => {
616                Ok(RichContentPartExpression::ImageUrl {
617                    image_url: WithExpression::Value(image_url),
618                })
619            }
620            RichContentPart::InputAudio { input_audio } => {
621                Ok(RichContentPartExpression::InputAudio {
622                    input_audio: WithExpression::Value(input_audio),
623                })
624            }
625            RichContentPart::InputVideo { video_url } => {
626                Ok(RichContentPartExpression::InputVideo {
627                    video_url: WithExpression::Value(video_url),
628                })
629            }
630            RichContentPart::VideoUrl { video_url } => {
631                Ok(RichContentPartExpression::VideoUrl {
632                    video_url: WithExpression::Value(video_url),
633                })
634            }
635            RichContentPart::File { file } => {
636                Ok(RichContentPartExpression::File {
637                    file: WithExpression::Value(file),
638                })
639            }
640        }
641    }
642}
643
644/// An image URL for multimodal input.
645#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize, JsonSchema, arbitrary::Arbitrary)]
646#[schemars(rename = "agent.completions.message.ImageUrl")]
647pub struct ImageUrl {
648    /// The URL of the image (can be a data URL or HTTP URL).
649    pub url: String,
650    /// The detail level for image processing.
651    #[serde(skip_serializing_if = "Option::is_none")]
652    #[schemars(extend("omitempty" = true))]
653    pub detail: Option<ImageUrlDetail>,
654}
655
656impl ImageUrl {
657    /// Prepares the image URL by normalizing the detail field.
658    pub fn prepare(&mut self) {
659        if matches!(self.detail, Some(ImageUrlDetail::Auto)) {
660            self.detail = None;
661        }
662    }
663
664    /// Returns `true` if the URL is empty and no detail is set.
665    pub fn is_empty(&self) -> bool {
666        self.url.is_empty() && self.detail.is_none()
667    }
668
669    /// Returns extractable file content if this is a base64 data URL.
670    ///
671    /// HTTP/HTTPS URLs return `None` (kept inline).
672    pub fn file_content(&self) -> Option<super::FileContent<'_>> {
673        let (mime, payload) = super::file_content::parse_data_url(&self.url)?;
674        Some(super::FileContent {
675            content: payload,
676            extension: super::file_content::mime_to_ext(mime),
677        })
678    }
679}
680
681impl ToStarlarkValue for ImageUrl {
682    fn to_starlark_value<'v>(
683        &self,
684        heap: &'v StarlarkHeap,
685    ) -> StarlarkValue<'v> {
686        heap.alloc(StarlarkAllocDict([
687            ("url", self.url.to_starlark_value(heap)),
688            ("detail", self.detail.to_starlark_value(heap)),
689        ]))
690    }
691}
692
693impl FromStarlarkValue for ImageUrl {
694    fn from_starlark_value(
695        value: &StarlarkValue,
696    ) -> Result<Self, ExpressionError> {
697        let dict = StarlarkDictRef::from_value(*value).ok_or_else(|| {
698            ExpressionError::StarlarkConversionError(
699                "ImageUrl: expected dict".into(),
700            )
701        })?;
702        let mut url = None;
703        let mut detail = None;
704        for (k, v) in dict.iter() {
705            let key = <&str as UnpackValue>::unpack_value(k)
706                .map_err(|e| {
707                    ExpressionError::StarlarkConversionError(e.to_string())
708                })?
709                .ok_or_else(|| {
710                    ExpressionError::StarlarkConversionError(
711                        "ImageUrl: expected string key".into(),
712                    )
713                })?;
714            match key {
715                "url" => url = Some(String::from_starlark_value(&v)?),
716                "detail" => {
717                    detail = Option::<ImageUrlDetail>::from_starlark_value(&v)?
718                }
719                _ => {}
720            }
721            if url.is_some() && detail.is_some() {
722                break;
723            }
724        }
725        Ok(ImageUrl {
726            url: url.ok_or_else(|| {
727                ExpressionError::StarlarkConversionError(
728                    "ImageUrl: missing url".into(),
729                )
730            })?,
731            detail,
732        })
733    }
734}
735
736/// Detail level for image processing.
737#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Serialize, Deserialize, JsonSchema, arbitrary::Arbitrary)]
738#[schemars(rename = "agent.completions.message.ImageUrlDetail")]
739pub enum ImageUrlDetail {
740    /// Let the model decide the detail level.
741    #[schemars(title = "Auto")]
742    #[serde(rename = "auto")]
743    Auto,
744    /// Low detail mode (faster, less tokens).
745    #[schemars(title = "Low")]
746    #[serde(rename = "low")]
747    Low,
748    /// High detail mode (more accurate, more tokens).
749    #[schemars(title = "High")]
750    #[serde(rename = "high")]
751    High,
752}
753
754impl ToStarlarkValue for ImageUrlDetail {
755    fn to_starlark_value<'v>(
756        &self,
757        heap: &'v StarlarkHeap,
758    ) -> StarlarkValue<'v> {
759        match self {
760            ImageUrlDetail::Auto => "auto".to_starlark_value(heap),
761            ImageUrlDetail::Low => "low".to_starlark_value(heap),
762            ImageUrlDetail::High => "high".to_starlark_value(heap),
763        }
764    }
765}
766
767impl FromStarlarkValue for ImageUrlDetail {
768    fn from_starlark_value(
769        value: &StarlarkValue,
770    ) -> Result<Self, ExpressionError> {
771        let s = <&str as UnpackValue>::unpack_value(*value)
772            .map_err(|e| {
773                ExpressionError::StarlarkConversionError(e.to_string())
774            })?
775            .ok_or_else(|| {
776                ExpressionError::StarlarkConversionError(
777                    "ImageUrlDetail: expected string".into(),
778                )
779            })?;
780        match s {
781            "auto" => Ok(ImageUrlDetail::Auto),
782            "low" => Ok(ImageUrlDetail::Low),
783            "high" => Ok(ImageUrlDetail::High),
784            _ => Err(ExpressionError::StarlarkConversionError(format!(
785                "ImageUrlDetail: unknown value: {}",
786                s
787            ))),
788        }
789    }
790}
791
792/// Audio input for multimodal messages.
793#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize, JsonSchema, arbitrary::Arbitrary)]
794#[schemars(rename = "agent.completions.message.InputAudio")]
795pub struct InputAudio {
796    /// Base64-encoded audio data.
797    pub data: String,
798    /// The audio format (e.g., "wav", "mp3").
799    pub format: String,
800}
801
802impl InputAudio {
803    /// Returns `true` if both data and format are empty.
804    pub fn is_empty(&self) -> bool {
805        self.data.is_empty() && self.format.is_empty()
806    }
807
808    /// Returns extractable file content if audio data is present.
809    ///
810    /// Audio is always base64-encoded inline, so this returns `Some`
811    /// whenever `data` is non-empty.
812    pub fn file_content(&self) -> Option<super::FileContent<'_>> {
813        if self.data.is_empty() {
814            return None;
815        }
816        Some(super::FileContent {
817            content: &self.data,
818            extension: if self.format.is_empty() { "bin" } else { &self.format },
819        })
820    }
821}
822
823impl ToStarlarkValue for InputAudio {
824    fn to_starlark_value<'v>(
825        &self,
826        heap: &'v StarlarkHeap,
827    ) -> StarlarkValue<'v> {
828        heap.alloc(StarlarkAllocDict([
829            ("data", self.data.to_starlark_value(heap)),
830            ("format", self.format.to_starlark_value(heap)),
831        ]))
832    }
833}
834
835impl FromStarlarkValue for InputAudio {
836    fn from_starlark_value(
837        value: &StarlarkValue,
838    ) -> Result<Self, ExpressionError> {
839        let dict = StarlarkDictRef::from_value(*value).ok_or_else(|| {
840            ExpressionError::StarlarkConversionError(
841                "InputAudio: expected dict".into(),
842            )
843        })?;
844        let mut data = None;
845        let mut format = None;
846        for (k, v) in dict.iter() {
847            let key = <&str as UnpackValue>::unpack_value(k)
848                .map_err(|e| {
849                    ExpressionError::StarlarkConversionError(e.to_string())
850                })?
851                .ok_or_else(|| {
852                    ExpressionError::StarlarkConversionError(
853                        "InputAudio: expected string key".into(),
854                    )
855                })?;
856            match key {
857                "data" => data = Some(String::from_starlark_value(&v)?),
858                "format" => format = Some(String::from_starlark_value(&v)?),
859                _ => {}
860            }
861            if data.is_some() && format.is_some() {
862                break;
863            }
864        }
865        Ok(InputAudio {
866            data: data.unwrap_or_default(),
867            format: format.unwrap_or_default(),
868        })
869    }
870}
871
872/// A video URL for multimodal input.
873#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize, JsonSchema, arbitrary::Arbitrary)]
874#[schemars(rename = "agent.completions.message.VideoUrl")]
875pub struct VideoUrl {
876    /// The URL of the video.
877    pub url: String,
878}
879
880impl VideoUrl {
881    /// Returns `true` if the URL is empty.
882    pub fn is_empty(&self) -> bool {
883        self.url.is_empty()
884    }
885
886    /// Returns extractable file content if this is a base64 data URL.
887    ///
888    /// HTTP/HTTPS URLs return `None` (kept inline).
889    pub fn file_content(&self) -> Option<super::FileContent<'_>> {
890        let (mime, payload) = super::file_content::parse_data_url(&self.url)?;
891        Some(super::FileContent {
892            content: payload,
893            extension: super::file_content::mime_to_ext(mime),
894        })
895    }
896}
897
898impl ToStarlarkValue for VideoUrl {
899    fn to_starlark_value<'v>(
900        &self,
901        heap: &'v StarlarkHeap,
902    ) -> StarlarkValue<'v> {
903        heap.alloc(StarlarkAllocDict([(
904            "url",
905            self.url.to_starlark_value(heap),
906        )]))
907    }
908}
909
910impl FromStarlarkValue for VideoUrl {
911    fn from_starlark_value(
912        value: &StarlarkValue,
913    ) -> Result<Self, ExpressionError> {
914        let dict = StarlarkDictRef::from_value(*value).ok_or_else(|| {
915            ExpressionError::StarlarkConversionError(
916                "VideoUrl: expected dict".into(),
917            )
918        })?;
919        let mut url = None;
920        for (k, v) in dict.iter() {
921            let key = <&str as UnpackValue>::unpack_value(k)
922                .map_err(|e| {
923                    ExpressionError::StarlarkConversionError(e.to_string())
924                })?
925                .ok_or_else(|| {
926                    ExpressionError::StarlarkConversionError(
927                        "VideoUrl: expected string key".into(),
928                    )
929                })?;
930            if key == "url" {
931                url = Some(String::from_starlark_value(&v)?);
932            }
933        }
934        Ok(VideoUrl {
935            url: url.ok_or_else(|| {
936                ExpressionError::StarlarkConversionError(
937                    "VideoUrl: missing url".into(),
938                )
939            })?,
940        })
941    }
942}
943
944/// A file attachment for multimodal input.
945#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize, JsonSchema, arbitrary::Arbitrary)]
946#[schemars(rename = "agent.completions.message.File")]
947pub struct File {
948    /// Base64-encoded file data.
949    #[serde(skip_serializing_if = "Option::is_none")]
950    #[schemars(extend("omitempty" = true))]
951    pub file_data: Option<String>,
952    /// The ID of a previously uploaded file.
953    #[serde(skip_serializing_if = "Option::is_none")]
954    #[schemars(extend("omitempty" = true))]
955    pub file_id: Option<String>,
956    /// The filename for display purposes.
957    #[serde(skip_serializing_if = "Option::is_none")]
958    #[schemars(extend("omitempty" = true))]
959    pub filename: Option<String>,
960    /// A URL to fetch the file from.
961    #[serde(skip_serializing_if = "Option::is_none")]
962    #[schemars(extend("omitempty" = true))]
963    pub file_url: Option<String>,
964}
965
966impl File {
967    /// Prepares the file by normalizing empty strings to `None`.
968    pub fn prepare(&mut self) {
969        if self.file_data.as_ref().is_some_and(String::is_empty) {
970            self.file_data = None;
971        }
972        if self.file_id.as_ref().is_some_and(String::is_empty) {
973            self.file_id = None;
974        }
975        if self.filename.as_ref().is_some_and(String::is_empty) {
976            self.filename = None;
977        }
978        if self.file_url.as_ref().is_some_and(String::is_empty) {
979            self.file_url = None;
980        }
981    }
982
983    /// Returns `true` if all file fields are `None`.
984    pub fn is_empty(&self) -> bool {
985        self.file_data.is_none()
986            && self.file_id.is_none()
987            && self.filename.is_none()
988            && self.file_url.is_none()
989    }
990
991    /// Returns extractable file content if inline file data is present.
992    ///
993    /// Files referenced only by URL or ID return `None` (kept inline).
994    pub fn file_content(&self) -> Option<super::FileContent<'_>> {
995        let data = self.file_data.as_deref()?;
996        if data.is_empty() {
997            return None;
998        }
999        let ext = self.filename.as_deref()
1000            .and_then(|name| name.rsplit_once('.'))
1001            .map(|(_, ext)| ext)
1002            .unwrap_or("bin");
1003        Some(super::FileContent {
1004            content: data,
1005            extension: ext,
1006        })
1007    }
1008}
1009
1010impl ToStarlarkValue for File {
1011    fn to_starlark_value<'v>(
1012        &self,
1013        heap: &'v StarlarkHeap,
1014    ) -> StarlarkValue<'v> {
1015        heap.alloc(StarlarkAllocDict([
1016            ("file_data", self.file_data.to_starlark_value(heap)),
1017            ("file_id", self.file_id.to_starlark_value(heap)),
1018            ("filename", self.filename.to_starlark_value(heap)),
1019            ("file_url", self.file_url.to_starlark_value(heap)),
1020        ]))
1021    }
1022}
1023
1024impl FromStarlarkValue for File {
1025    fn from_starlark_value(
1026        value: &StarlarkValue,
1027    ) -> Result<Self, ExpressionError> {
1028        let dict = StarlarkDictRef::from_value(*value).ok_or_else(|| {
1029            ExpressionError::StarlarkConversionError(
1030                "File: expected dict".into(),
1031            )
1032        })?;
1033        let mut file_data = None;
1034        let mut file_id = None;
1035        let mut filename = None;
1036        let mut file_url = None;
1037        for (k, v) in dict.iter() {
1038            let key = <&str as UnpackValue>::unpack_value(k)
1039                .map_err(|e| {
1040                    ExpressionError::StarlarkConversionError(e.to_string())
1041                })?
1042                .ok_or_else(|| {
1043                    ExpressionError::StarlarkConversionError(
1044                        "File: expected string key".into(),
1045                    )
1046                })?;
1047            match key {
1048                "file_data" => {
1049                    file_data = Option::<String>::from_starlark_value(&v)?
1050                }
1051                "file_id" => {
1052                    file_id = Option::<String>::from_starlark_value(&v)?
1053                }
1054                "filename" => {
1055                    filename = Option::<String>::from_starlark_value(&v)?
1056                }
1057                "file_url" => {
1058                    file_url = Option::<String>::from_starlark_value(&v)?
1059                }
1060                _ => {}
1061            }
1062        }
1063        Ok(File {
1064            file_data,
1065            file_id,
1066            filename,
1067            file_url,
1068        })
1069    }
1070}
1071
1072crate::functions::expression::impl_from_special_unsupported!(
1073    RichContentExpression,
1074    RichContentPartExpression,
1075    ImageUrl,
1076    InputAudio,
1077    VideoUrl,
1078    File,
1079);
1080
1081impl crate::functions::expression::FromSpecial
1082    for Vec<crate::functions::expression::WithExpression<RichContentExpression>>
1083{
1084    fn from_special(
1085        _special: &crate::functions::expression::Special,
1086        _params: &crate::functions::expression::Params,
1087    ) -> Result<Self, crate::functions::expression::ExpressionError> {
1088        Err(crate::functions::expression::ExpressionError::UnsupportedSpecial)
1089    }
1090}