Skip to main content

entelix_core/ir/
content.rs

1//! `ContentPart` — a single block within a [`Message`](crate::ir::Message).
2//!
3//! Provider-neutral. Vendors that lack support for a variant cause the
4//! codec to emit a [`ModelWarning::LossyEncode`](crate::ir::ModelWarning)
5//! rather than failing silently (invariant 6).
6
7use serde::{Deserialize, Serialize};
8
9use crate::ir::cache::CacheControl;
10use crate::ir::provider_echo::ProviderEchoSnapshot;
11use crate::ir::source::{CitationSource, MediaSource};
12
13/// One block of content inside a [`Message`](crate::ir::Message).
14///
15/// The enum is `#[non_exhaustive]` so future variants don't break user
16/// `match` arms. New modalities or capability blocks land here as
17/// additional variants — codecs reach 100% IR coverage by either
18/// emitting native wire shape or a `LossyEncode` warning for each.
19///
20/// Every input-side variant carries an
21/// `Option<CacheControl>` field — operators mark a
22/// block as cached and codecs that support per-block caching
23/// (Anthropic, Bedrock-on-Anthropic) emit the directive natively.
24/// Other codecs emit `LossyEncode`. The `ToolUse` variant — the
25/// assistant's outbound call — does not carry caching: the model
26/// emits it, there is nothing to cache.
27///
28/// Every variant also carries
29/// `provider_echoes: Vec<ProviderEchoSnapshot>` — vendor-keyed opaque
30/// round-trip tokens this part must echo back on the next turn
31/// (Gemini 3.x `thought_signature`, Anthropic `signature`, OpenAI
32/// Responses `encrypted_content`, …). Defaults to empty. Codecs
33/// only read / write entries matching their own `Codec::name`. See
34/// [`ProviderEchoSnapshot`] for the cross-vendor design.
35#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
36#[non_exhaustive]
37#[serde(tag = "type", rename_all = "snake_case")]
38pub enum ContentPart {
39    /// Plain UTF-8 text — the primary medium.
40    Text {
41        /// The text payload.
42        text: String,
43        /// Per-block cache directive.
44        #[serde(default, skip_serializing_if = "Option::is_none")]
45        cache_control: Option<CacheControl>,
46        /// Vendor opaque round-trip tokens (Gemini emits
47        /// `thought_signature` on `text` parts in reasoning turns).
48        #[serde(default, skip_serializing_if = "Vec::is_empty")]
49        provider_echoes: Vec<ProviderEchoSnapshot>,
50    },
51
52    /// An image input.
53    Image {
54        /// Where the image bytes live.
55        source: MediaSource,
56        /// Per-block cache directive.
57        #[serde(default, skip_serializing_if = "Option::is_none")]
58        cache_control: Option<CacheControl>,
59        /// Vendor opaque round-trip tokens.
60        #[serde(default, skip_serializing_if = "Vec::is_empty")]
61        provider_echoes: Vec<ProviderEchoSnapshot>,
62    },
63
64    /// An audio input.
65    Audio {
66        /// Where the audio bytes live.
67        source: MediaSource,
68        /// Per-block cache directive.
69        #[serde(default, skip_serializing_if = "Option::is_none")]
70        cache_control: Option<CacheControl>,
71        /// Vendor opaque round-trip tokens.
72        #[serde(default, skip_serializing_if = "Vec::is_empty")]
73        provider_echoes: Vec<ProviderEchoSnapshot>,
74    },
75
76    /// A video input.
77    Video {
78        /// Where the video bytes live.
79        source: MediaSource,
80        /// Per-block cache directive.
81        #[serde(default, skip_serializing_if = "Option::is_none")]
82        cache_control: Option<CacheControl>,
83        /// Vendor opaque round-trip tokens.
84        #[serde(default, skip_serializing_if = "Vec::is_empty")]
85        provider_echoes: Vec<ProviderEchoSnapshot>,
86    },
87
88    /// A document input (PDF, plain-text file, etc.).
89    Document {
90        /// Where the document bytes live.
91        source: MediaSource,
92        /// Display name shown to the model (e.g. `"contract.pdf"`).
93        /// Optional — codecs that require a name supply a stable
94        /// derivation when absent.
95        name: Option<String>,
96        /// Per-block cache directive.
97        #[serde(default, skip_serializing_if = "Option::is_none")]
98        cache_control: Option<CacheControl>,
99        /// Vendor opaque round-trip tokens.
100        #[serde(default, skip_serializing_if = "Vec::is_empty")]
101        provider_echoes: Vec<ProviderEchoSnapshot>,
102    },
103
104    /// A reasoning / extended-thinking block produced by the assistant
105    /// before the user-facing reply.
106    ///
107    /// Surfaced as its own variant (rather than mixed with `Text`) so
108    /// recipes can show / hide / cache reasoning independently. Order
109    /// relative to `Text` parts is preserved — vendors that rely on
110    /// chain-of-thought integrity (Anthropic thinking, Gemini 3.x
111    /// `thought_signature`) require the original block order on
112    /// follow-up turns.
113    ///
114    /// Vendor opaque tokens (Anthropic `signature`, Gemini
115    /// `thought_signature`, OpenAI Responses reasoning-item
116    /// `encrypted_content`) ride on `provider_echoes`.
117    Thinking {
118        /// The reasoning text.
119        text: String,
120        /// Per-block cache directive.
121        #[serde(default, skip_serializing_if = "Option::is_none")]
122        cache_control: Option<CacheControl>,
123        /// Vendor opaque round-trip tokens.
124        #[serde(default, skip_serializing_if = "Vec::is_empty")]
125        provider_echoes: Vec<ProviderEchoSnapshot>,
126    },
127
128    /// A reasoning block the safety system flagged for redaction.
129    /// Carries no harness-readable text — the entire block is an
130    /// opaque round-trip artifact preserved in `provider_echoes`.
131    ///
132    /// Emitted by Anthropic Claude 3.7 Sonnet only; Claude 4.x and
133    /// later do not produce this variant. Codecs that don't recognise
134    /// it on encode emit `LossyEncode` (invariant 6 — the prior
135    /// silent-drop is replaced by a typed channel).
136    RedactedThinking {
137        /// Vendor opaque round-trip tokens. Anthropic emits
138        /// `{ "data": "<base64>" }` here under provider key
139        /// `"anthropic-messages"`.
140        #[serde(default, skip_serializing_if = "Vec::is_empty")]
141        provider_echoes: Vec<ProviderEchoSnapshot>,
142    },
143
144    /// A grounded citation produced by the assistant — the `snippet`
145    /// is the verbatim cited text; `source` describes provenance.
146    Citation {
147        /// The cited substring as it appears in the assistant's reply.
148        snippet: String,
149        /// Where the snippet came from.
150        source: CitationSource,
151        /// Per-block cache directive.
152        #[serde(default, skip_serializing_if = "Option::is_none")]
153        cache_control: Option<CacheControl>,
154        /// Vendor opaque round-trip tokens.
155        #[serde(default, skip_serializing_if = "Vec::is_empty")]
156        provider_echoes: Vec<ProviderEchoSnapshot>,
157    },
158
159    /// A tool call emitted by the assistant. The harness dispatches it.
160    /// Tool calls are model output — they do not carry a cache
161    /// directive (the model emits each call afresh).
162    ///
163    /// Vendor opaque tokens (Gemini 3.x `thought_signature` on
164    /// `functionCall` parts, OpenAI Responses `function_call.id`)
165    /// ride on `provider_echoes`. Missing the Gemini token on the
166    /// first `functionCall` of a step yields HTTP 400 on the next
167    /// turn — codecs MUST round-trip it.
168    ToolUse {
169        /// Stable ID matched against the corresponding `ToolResult`.
170        id: String,
171        /// Tool name to dispatch (must exist in the active `ToolRegistry`).
172        name: String,
173        /// JSON arguments for the tool — must validate against the
174        /// tool's `input_schema` before dispatch.
175        input: serde_json::Value,
176        /// Vendor opaque round-trip tokens.
177        #[serde(default, skip_serializing_if = "Vec::is_empty")]
178        provider_echoes: Vec<ProviderEchoSnapshot>,
179    },
180
181    /// An image the assistant produced (vendor-managed image
182    /// generation: OpenAI `image_generation_call.result`, Gemini
183    /// `inline_data` parts on the model output, …). Distinct from
184    /// [`Self::Image`] which is a user-supplied input.
185    ///
186    /// Output blocks have no cache directive — they are produced
187    /// fresh per turn.
188    ImageOutput {
189        /// Where the produced image bytes live. Most vendors return
190        /// inline base64 ([`MediaSource::Base64`]); some return a
191        /// hosted URL.
192        source: MediaSource,
193        /// Vendor opaque round-trip tokens.
194        #[serde(default, skip_serializing_if = "Vec::is_empty")]
195        provider_echoes: Vec<ProviderEchoSnapshot>,
196    },
197
198    /// Audio the assistant produced (text-to-speech reply). Distinct
199    /// from [`Self::Audio`] which is a user-supplied input.
200    AudioOutput {
201        /// Where the produced audio bytes live.
202        source: MediaSource,
203        /// Optional textual transcript the vendor returned alongside
204        /// the audio. Surfaced separately so callers can route
205        /// transcript text through the operator's logging channel
206        /// without re-decoding the audio.
207        transcript: Option<String>,
208        /// Vendor opaque round-trip tokens.
209        #[serde(default, skip_serializing_if = "Vec::is_empty")]
210        provider_echoes: Vec<ProviderEchoSnapshot>,
211    },
212
213    /// The harness's reply to a previous `ToolUse` call.
214    ///
215    /// Both `tool_use_id` and `name` are carried because providers
216    /// disagree on which one keys correlation: Anthropic / OpenAI /
217    /// Bedrock use the id (`tool_use_id` / `tool_call_id` /
218    /// `toolUseId`), while Gemini's `functionResponse` keys by
219    /// `name`. Carrying both keeps the IR provider-neutral —
220    /// codecs use whichever their wire format requires without
221    /// needing the agent harness to know.
222    ToolResult {
223        /// The originating `ToolUse::id`.
224        tool_use_id: String,
225        /// The originating `ToolUse::name`. Required for Gemini's
226        /// `functionResponse` wire shape; ignored by codecs that
227        /// correlate purely by id.
228        name: String,
229        /// Result payload — either a string or structured data.
230        content: ToolResultContent,
231        /// True if the tool reported a failure.
232        #[serde(default)]
233        is_error: bool,
234        /// Per-block cache directive. Tool result blocks
235        /// often carry the heaviest payloads; caching them across
236        /// turns is the canonical RAG-cache pattern.
237        #[serde(default, skip_serializing_if = "Option::is_none")]
238        cache_control: Option<CacheControl>,
239        /// Vendor opaque round-trip tokens.
240        #[serde(default, skip_serializing_if = "Vec::is_empty")]
241        provider_echoes: Vec<ProviderEchoSnapshot>,
242    },
243}
244
245impl ContentPart {
246    /// Build a text part from anything string-like.
247    #[must_use]
248    pub fn text(text: impl Into<String>) -> Self {
249        Self::Text {
250            text: text.into(),
251            cache_control: None,
252            provider_echoes: Vec::new(),
253        }
254    }
255
256    /// Build an image part from a media source.
257    #[must_use]
258    pub fn image(source: MediaSource) -> Self {
259        Self::Image {
260            source,
261            cache_control: None,
262            provider_echoes: Vec::new(),
263        }
264    }
265
266    /// Build an audio part from a media source.
267    #[must_use]
268    pub fn audio(source: MediaSource) -> Self {
269        Self::Audio {
270            source,
271            cache_control: None,
272            provider_echoes: Vec::new(),
273        }
274    }
275
276    /// Build a video part from a media source.
277    #[must_use]
278    pub fn video(source: MediaSource) -> Self {
279        Self::Video {
280            source,
281            cache_control: None,
282            provider_echoes: Vec::new(),
283        }
284    }
285
286    /// Build a document part from a media source.
287    #[must_use]
288    pub fn document(source: MediaSource, name: Option<String>) -> Self {
289        Self::Document {
290            source,
291            name,
292            cache_control: None,
293            provider_echoes: Vec::new(),
294        }
295    }
296
297    /// Build a thinking part with no opaque tokens. Codecs attach
298    /// vendor opaque tokens via [`Self::with_provider_echo`].
299    #[must_use]
300    pub fn thinking(text: impl Into<String>) -> Self {
301        Self::Thinking {
302            text: text.into(),
303            cache_control: None,
304            provider_echoes: Vec::new(),
305        }
306    }
307
308    /// Build a redacted-thinking part. Anthropic Claude 3.7 Sonnet
309    /// emits these for safety-flagged reasoning; the codec attaches
310    /// the opaque `data` payload via [`Self::with_provider_echo`].
311    #[must_use]
312    pub fn redacted_thinking() -> Self {
313        Self::RedactedThinking {
314            provider_echoes: Vec::new(),
315        }
316    }
317
318    /// Borrow the cache directive on this part, when any is set.
319    /// Returns `None` for assistant-output variants (`ToolUse`,
320    /// `ImageOutput`, `AudioOutput`, `RedactedThinking`) — they are
321    /// produced fresh per turn and have nothing to cache.
322    #[must_use]
323    pub const fn cache_control(&self) -> Option<&CacheControl> {
324        match self {
325            Self::Text { cache_control, .. }
326            | Self::Image { cache_control, .. }
327            | Self::Audio { cache_control, .. }
328            | Self::Video { cache_control, .. }
329            | Self::Document { cache_control, .. }
330            | Self::Thinking { cache_control, .. }
331            | Self::Citation { cache_control, .. }
332            | Self::ToolResult { cache_control, .. } => cache_control.as_ref(),
333            Self::ToolUse { .. }
334            | Self::ImageOutput { .. }
335            | Self::AudioOutput { .. }
336            | Self::RedactedThinking { .. } => None,
337        }
338    }
339
340    /// Borrow this part's vendor opaque round-trip tokens. Codecs use
341    /// this to recover their own blob (matched by `Codec::name`) on
342    /// the encode side.
343    #[must_use]
344    pub fn provider_echoes(&self) -> &[ProviderEchoSnapshot] {
345        match self {
346            Self::Text {
347                provider_echoes, ..
348            }
349            | Self::Image {
350                provider_echoes, ..
351            }
352            | Self::Audio {
353                provider_echoes, ..
354            }
355            | Self::Video {
356                provider_echoes, ..
357            }
358            | Self::Document {
359                provider_echoes, ..
360            }
361            | Self::Thinking {
362                provider_echoes, ..
363            }
364            | Self::RedactedThinking { provider_echoes }
365            | Self::Citation {
366                provider_echoes, ..
367            }
368            | Self::ToolUse {
369                provider_echoes, ..
370            }
371            | Self::ImageOutput {
372                provider_echoes, ..
373            }
374            | Self::AudioOutput {
375                provider_echoes, ..
376            }
377            | Self::ToolResult {
378                provider_echoes, ..
379            } => provider_echoes,
380        }
381    }
382
383    /// Attach (or clear) a cache directive on this part. Returns the
384    /// value back so callers can chain. No-op on `ToolUse` /
385    /// `ImageOutput` / `AudioOutput` / `RedactedThinking` (model
386    /// output never carries caching) — the directive is silently
387    /// dropped.
388    #[must_use]
389    pub fn with_cache_control(self, cache: CacheControl) -> Self {
390        match self {
391            Self::Text {
392                text,
393                provider_echoes,
394                ..
395            } => Self::Text {
396                text,
397                cache_control: Some(cache),
398                provider_echoes,
399            },
400            Self::Image {
401                source,
402                provider_echoes,
403                ..
404            } => Self::Image {
405                source,
406                cache_control: Some(cache),
407                provider_echoes,
408            },
409            Self::Audio {
410                source,
411                provider_echoes,
412                ..
413            } => Self::Audio {
414                source,
415                cache_control: Some(cache),
416                provider_echoes,
417            },
418            Self::Video {
419                source,
420                provider_echoes,
421                ..
422            } => Self::Video {
423                source,
424                cache_control: Some(cache),
425                provider_echoes,
426            },
427            Self::Document {
428                source,
429                name,
430                provider_echoes,
431                ..
432            } => Self::Document {
433                source,
434                name,
435                cache_control: Some(cache),
436                provider_echoes,
437            },
438            Self::Thinking {
439                text,
440                provider_echoes,
441                ..
442            } => Self::Thinking {
443                text,
444                cache_control: Some(cache),
445                provider_echoes,
446            },
447            Self::Citation {
448                snippet,
449                source,
450                provider_echoes,
451                ..
452            } => Self::Citation {
453                snippet,
454                source,
455                cache_control: Some(cache),
456                provider_echoes,
457            },
458            Self::ToolResult {
459                tool_use_id,
460                name,
461                content,
462                is_error,
463                provider_echoes,
464                ..
465            } => Self::ToolResult {
466                tool_use_id,
467                name,
468                content,
469                is_error,
470                cache_control: Some(cache),
471                provider_echoes,
472            },
473            other @ (Self::ToolUse { .. }
474            | Self::ImageOutput { .. }
475            | Self::AudioOutput { .. }
476            | Self::RedactedThinking { .. }) => other,
477        }
478    }
479
480    /// Append a vendor opaque round-trip token to this part. Codecs
481    /// call this on the decode side after extracting the wire-shape
482    /// signature / encrypted_content / data field. Returns the value
483    /// back so callers can chain.
484    #[must_use]
485    pub fn with_provider_echo(mut self, echo: ProviderEchoSnapshot) -> Self {
486        match &mut self {
487            Self::Text {
488                provider_echoes, ..
489            }
490            | Self::Image {
491                provider_echoes, ..
492            }
493            | Self::Audio {
494                provider_echoes, ..
495            }
496            | Self::Video {
497                provider_echoes, ..
498            }
499            | Self::Document {
500                provider_echoes, ..
501            }
502            | Self::Thinking {
503                provider_echoes, ..
504            }
505            | Self::RedactedThinking { provider_echoes }
506            | Self::Citation {
507                provider_echoes, ..
508            }
509            | Self::ToolUse {
510                provider_echoes, ..
511            }
512            | Self::ImageOutput {
513                provider_echoes, ..
514            }
515            | Self::AudioOutput {
516                provider_echoes, ..
517            }
518            | Self::ToolResult {
519                provider_echoes, ..
520            } => provider_echoes.push(echo),
521        }
522        self
523    }
524}
525
526/// Payload of a [`ContentPart::ToolResult`].
527#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
528#[serde(untagged)]
529#[non_exhaustive]
530pub enum ToolResultContent {
531    /// Plain text result — the most common shape.
532    Text(String),
533    /// Structured JSON result. Codecs that lack structured-result support
534    /// stringify and emit a `LossyEncode` warning.
535    Json(serde_json::Value),
536}