entelix_core/ir/content.rs
1//! `ContentPart` — a single block within a [`Message`](crate::ir::Message).
2//!
3//! Provider-neutral. Vendors that lack support for a variant cause the
4//! codec to emit a [`ModelWarning::LossyEncode`](crate::ir::ModelWarning)
5//! rather than failing silently (invariant 6).
6
7use serde::{Deserialize, Serialize};
8
9use crate::ir::cache::CacheControl;
10use crate::ir::provider_echo::ProviderEchoSnapshot;
11use crate::ir::source::{CitationSource, MediaSource};
12
13/// One block of content inside a [`Message`](crate::ir::Message).
14///
15/// The enum is `#[non_exhaustive]` so future variants don't break user
16/// `match` arms. New modalities or capability blocks land here as
17/// additional variants — codecs reach 100% IR coverage by either
18/// emitting native wire shape or a `LossyEncode` warning for each.
19///
20/// Every input-side variant carries an
21/// `Option<CacheControl>` field — operators mark a
22/// block as cached and codecs that support per-block caching
23/// (Anthropic, Bedrock-on-Anthropic) emit the directive natively.
24/// Other codecs emit `LossyEncode`. The `ToolUse` variant — the
25/// assistant's outbound call — does not carry caching: the model
26/// emits it, there is nothing to cache.
27///
28/// Every variant also carries
29/// `provider_echoes: Vec<ProviderEchoSnapshot>` — vendor-keyed opaque
30/// round-trip tokens this part must echo back on the next turn
31/// (Gemini 3.x `thought_signature`, Anthropic `signature`, OpenAI
32/// Responses `encrypted_content`, …). Defaults to empty. Codecs
33/// only read / write entries matching their own `Codec::name`. See
34/// [`ProviderEchoSnapshot`] for the cross-vendor design.
35#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
36#[non_exhaustive]
37#[serde(tag = "type", rename_all = "snake_case")]
38pub enum ContentPart {
39 /// Plain UTF-8 text — the primary medium.
40 Text {
41 /// The text payload.
42 text: String,
43 /// Per-block cache directive.
44 #[serde(default, skip_serializing_if = "Option::is_none")]
45 cache_control: Option<CacheControl>,
46 /// Vendor opaque round-trip tokens (Gemini emits
47 /// `thought_signature` on `text` parts in reasoning turns).
48 #[serde(default, skip_serializing_if = "Vec::is_empty")]
49 provider_echoes: Vec<ProviderEchoSnapshot>,
50 },
51
52 /// An image input.
53 Image {
54 /// Where the image bytes live.
55 source: MediaSource,
56 /// Per-block cache directive.
57 #[serde(default, skip_serializing_if = "Option::is_none")]
58 cache_control: Option<CacheControl>,
59 /// Vendor opaque round-trip tokens.
60 #[serde(default, skip_serializing_if = "Vec::is_empty")]
61 provider_echoes: Vec<ProviderEchoSnapshot>,
62 },
63
64 /// An audio input.
65 Audio {
66 /// Where the audio bytes live.
67 source: MediaSource,
68 /// Per-block cache directive.
69 #[serde(default, skip_serializing_if = "Option::is_none")]
70 cache_control: Option<CacheControl>,
71 /// Vendor opaque round-trip tokens.
72 #[serde(default, skip_serializing_if = "Vec::is_empty")]
73 provider_echoes: Vec<ProviderEchoSnapshot>,
74 },
75
76 /// A video input.
77 Video {
78 /// Where the video bytes live.
79 source: MediaSource,
80 /// Per-block cache directive.
81 #[serde(default, skip_serializing_if = "Option::is_none")]
82 cache_control: Option<CacheControl>,
83 /// Vendor opaque round-trip tokens.
84 #[serde(default, skip_serializing_if = "Vec::is_empty")]
85 provider_echoes: Vec<ProviderEchoSnapshot>,
86 },
87
88 /// A document input (PDF, plain-text file, etc.).
89 Document {
90 /// Where the document bytes live.
91 source: MediaSource,
92 /// Display name shown to the model (e.g. `"contract.pdf"`).
93 /// Optional — codecs that require a name supply a stable
94 /// derivation when absent.
95 name: Option<String>,
96 /// Per-block cache directive.
97 #[serde(default, skip_serializing_if = "Option::is_none")]
98 cache_control: Option<CacheControl>,
99 /// Vendor opaque round-trip tokens.
100 #[serde(default, skip_serializing_if = "Vec::is_empty")]
101 provider_echoes: Vec<ProviderEchoSnapshot>,
102 },
103
104 /// A reasoning / extended-thinking block produced by the assistant
105 /// before the user-facing reply.
106 ///
107 /// Surfaced as its own variant (rather than mixed with `Text`) so
108 /// recipes can show / hide / cache reasoning independently. Order
109 /// relative to `Text` parts is preserved — vendors that rely on
110 /// chain-of-thought integrity (Anthropic thinking, Gemini 3.x
111 /// `thought_signature`) require the original block order on
112 /// follow-up turns.
113 ///
114 /// Vendor opaque tokens (Anthropic `signature`, Gemini
115 /// `thought_signature`, OpenAI Responses reasoning-item
116 /// `encrypted_content`) ride on `provider_echoes`.
117 Thinking {
118 /// The reasoning text.
119 text: String,
120 /// Per-block cache directive.
121 #[serde(default, skip_serializing_if = "Option::is_none")]
122 cache_control: Option<CacheControl>,
123 /// Vendor opaque round-trip tokens.
124 #[serde(default, skip_serializing_if = "Vec::is_empty")]
125 provider_echoes: Vec<ProviderEchoSnapshot>,
126 },
127
128 /// A reasoning block the safety system flagged for redaction.
129 /// Carries no harness-readable text — the entire block is an
130 /// opaque round-trip artifact preserved in `provider_echoes`.
131 ///
132 /// Emitted by Anthropic Claude 3.7 Sonnet only; Claude 4.x and
133 /// later do not produce this variant. Codecs that don't recognise
134 /// it on encode emit `LossyEncode` (invariant 6 — the prior
135 /// silent-drop is replaced by a typed channel).
136 RedactedThinking {
137 /// Vendor opaque round-trip tokens. Anthropic emits
138 /// `{ "data": "<base64>" }` here under provider key
139 /// `"anthropic-messages"`.
140 #[serde(default, skip_serializing_if = "Vec::is_empty")]
141 provider_echoes: Vec<ProviderEchoSnapshot>,
142 },
143
144 /// A grounded citation produced by the assistant — the `snippet`
145 /// is the verbatim cited text; `source` describes provenance.
146 Citation {
147 /// The cited substring as it appears in the assistant's reply.
148 snippet: String,
149 /// Where the snippet came from.
150 source: CitationSource,
151 /// Per-block cache directive.
152 #[serde(default, skip_serializing_if = "Option::is_none")]
153 cache_control: Option<CacheControl>,
154 /// Vendor opaque round-trip tokens.
155 #[serde(default, skip_serializing_if = "Vec::is_empty")]
156 provider_echoes: Vec<ProviderEchoSnapshot>,
157 },
158
159 /// A tool call emitted by the assistant. The harness dispatches it.
160 /// Tool calls are model output — they do not carry a cache
161 /// directive (the model emits each call afresh).
162 ///
163 /// Vendor opaque tokens (Gemini 3.x `thought_signature` on
164 /// `functionCall` parts, OpenAI Responses `function_call.id`)
165 /// ride on `provider_echoes`. Missing the Gemini token on the
166 /// first `functionCall` of a step yields HTTP 400 on the next
167 /// turn — codecs MUST round-trip it.
168 ToolUse {
169 /// Stable ID matched against the corresponding `ToolResult`.
170 id: String,
171 /// Tool name to dispatch (must exist in the active `ToolRegistry`).
172 name: String,
173 /// JSON arguments for the tool — must validate against the
174 /// tool's `input_schema` before dispatch.
175 input: serde_json::Value,
176 /// Vendor opaque round-trip tokens.
177 #[serde(default, skip_serializing_if = "Vec::is_empty")]
178 provider_echoes: Vec<ProviderEchoSnapshot>,
179 },
180
181 /// An image the assistant produced (vendor-managed image
182 /// generation: OpenAI `image_generation_call.result`, Gemini
183 /// `inline_data` parts on the model output, …). Distinct from
184 /// [`Self::Image`] which is a user-supplied input.
185 ///
186 /// Output blocks have no cache directive — they are produced
187 /// fresh per turn.
188 ImageOutput {
189 /// Where the produced image bytes live. Most vendors return
190 /// inline base64 ([`MediaSource::Base64`]); some return a
191 /// hosted URL.
192 source: MediaSource,
193 /// Vendor opaque round-trip tokens.
194 #[serde(default, skip_serializing_if = "Vec::is_empty")]
195 provider_echoes: Vec<ProviderEchoSnapshot>,
196 },
197
198 /// Audio the assistant produced (text-to-speech reply). Distinct
199 /// from [`Self::Audio`] which is a user-supplied input.
200 AudioOutput {
201 /// Where the produced audio bytes live.
202 source: MediaSource,
203 /// Optional textual transcript the vendor returned alongside
204 /// the audio. Surfaced separately so callers can route
205 /// transcript text through the operator's logging channel
206 /// without re-decoding the audio.
207 transcript: Option<String>,
208 /// Vendor opaque round-trip tokens.
209 #[serde(default, skip_serializing_if = "Vec::is_empty")]
210 provider_echoes: Vec<ProviderEchoSnapshot>,
211 },
212
213 /// The harness's reply to a previous `ToolUse` call.
214 ///
215 /// Both `tool_use_id` and `name` are carried because providers
216 /// disagree on which one keys correlation: Anthropic / OpenAI /
217 /// Bedrock use the id (`tool_use_id` / `tool_call_id` /
218 /// `toolUseId`), while Gemini's `functionResponse` keys by
219 /// `name`. Carrying both keeps the IR provider-neutral —
220 /// codecs use whichever their wire format requires without
221 /// needing the agent harness to know.
222 ToolResult {
223 /// The originating `ToolUse::id`.
224 tool_use_id: String,
225 /// The originating `ToolUse::name`. Required for Gemini's
226 /// `functionResponse` wire shape; ignored by codecs that
227 /// correlate purely by id.
228 name: String,
229 /// Result payload — either a string or structured data.
230 content: ToolResultContent,
231 /// True if the tool reported a failure.
232 #[serde(default)]
233 is_error: bool,
234 /// Per-block cache directive. Tool result blocks
235 /// often carry the heaviest payloads; caching them across
236 /// turns is the canonical RAG-cache pattern.
237 #[serde(default, skip_serializing_if = "Option::is_none")]
238 cache_control: Option<CacheControl>,
239 /// Vendor opaque round-trip tokens.
240 #[serde(default, skip_serializing_if = "Vec::is_empty")]
241 provider_echoes: Vec<ProviderEchoSnapshot>,
242 },
243}
244
245impl ContentPart {
246 /// Build a text part from anything string-like.
247 #[must_use]
248 pub fn text(text: impl Into<String>) -> Self {
249 Self::Text {
250 text: text.into(),
251 cache_control: None,
252 provider_echoes: Vec::new(),
253 }
254 }
255
256 /// Build an image part from a media source.
257 #[must_use]
258 pub fn image(source: MediaSource) -> Self {
259 Self::Image {
260 source,
261 cache_control: None,
262 provider_echoes: Vec::new(),
263 }
264 }
265
266 /// Build an audio part from a media source.
267 #[must_use]
268 pub fn audio(source: MediaSource) -> Self {
269 Self::Audio {
270 source,
271 cache_control: None,
272 provider_echoes: Vec::new(),
273 }
274 }
275
276 /// Build a video part from a media source.
277 #[must_use]
278 pub fn video(source: MediaSource) -> Self {
279 Self::Video {
280 source,
281 cache_control: None,
282 provider_echoes: Vec::new(),
283 }
284 }
285
286 /// Build a document part from a media source.
287 #[must_use]
288 pub fn document(source: MediaSource, name: Option<String>) -> Self {
289 Self::Document {
290 source,
291 name,
292 cache_control: None,
293 provider_echoes: Vec::new(),
294 }
295 }
296
297 /// Build a thinking part with no opaque tokens. Codecs attach
298 /// vendor opaque tokens via [`Self::with_provider_echo`].
299 #[must_use]
300 pub fn thinking(text: impl Into<String>) -> Self {
301 Self::Thinking {
302 text: text.into(),
303 cache_control: None,
304 provider_echoes: Vec::new(),
305 }
306 }
307
308 /// Build a redacted-thinking part. Anthropic Claude 3.7 Sonnet
309 /// emits these for safety-flagged reasoning; the codec attaches
310 /// the opaque `data` payload via [`Self::with_provider_echo`].
311 #[must_use]
312 pub fn redacted_thinking() -> Self {
313 Self::RedactedThinking {
314 provider_echoes: Vec::new(),
315 }
316 }
317
318 /// Borrow the cache directive on this part, when any is set.
319 /// Returns `None` for assistant-output variants (`ToolUse`,
320 /// `ImageOutput`, `AudioOutput`, `RedactedThinking`) — they are
321 /// produced fresh per turn and have nothing to cache.
322 #[must_use]
323 pub const fn cache_control(&self) -> Option<&CacheControl> {
324 match self {
325 Self::Text { cache_control, .. }
326 | Self::Image { cache_control, .. }
327 | Self::Audio { cache_control, .. }
328 | Self::Video { cache_control, .. }
329 | Self::Document { cache_control, .. }
330 | Self::Thinking { cache_control, .. }
331 | Self::Citation { cache_control, .. }
332 | Self::ToolResult { cache_control, .. } => cache_control.as_ref(),
333 Self::ToolUse { .. }
334 | Self::ImageOutput { .. }
335 | Self::AudioOutput { .. }
336 | Self::RedactedThinking { .. } => None,
337 }
338 }
339
340 /// Borrow this part's vendor opaque round-trip tokens. Codecs use
341 /// this to recover their own blob (matched by `Codec::name`) on
342 /// the encode side.
343 #[must_use]
344 pub fn provider_echoes(&self) -> &[ProviderEchoSnapshot] {
345 match self {
346 Self::Text {
347 provider_echoes, ..
348 }
349 | Self::Image {
350 provider_echoes, ..
351 }
352 | Self::Audio {
353 provider_echoes, ..
354 }
355 | Self::Video {
356 provider_echoes, ..
357 }
358 | Self::Document {
359 provider_echoes, ..
360 }
361 | Self::Thinking {
362 provider_echoes, ..
363 }
364 | Self::RedactedThinking { provider_echoes }
365 | Self::Citation {
366 provider_echoes, ..
367 }
368 | Self::ToolUse {
369 provider_echoes, ..
370 }
371 | Self::ImageOutput {
372 provider_echoes, ..
373 }
374 | Self::AudioOutput {
375 provider_echoes, ..
376 }
377 | Self::ToolResult {
378 provider_echoes, ..
379 } => provider_echoes,
380 }
381 }
382
383 /// Attach (or clear) a cache directive on this part. Returns the
384 /// value back so callers can chain. No-op on `ToolUse` /
385 /// `ImageOutput` / `AudioOutput` / `RedactedThinking` (model
386 /// output never carries caching) — the directive is silently
387 /// dropped.
388 #[must_use]
389 pub fn with_cache_control(self, cache: CacheControl) -> Self {
390 match self {
391 Self::Text {
392 text,
393 provider_echoes,
394 ..
395 } => Self::Text {
396 text,
397 cache_control: Some(cache),
398 provider_echoes,
399 },
400 Self::Image {
401 source,
402 provider_echoes,
403 ..
404 } => Self::Image {
405 source,
406 cache_control: Some(cache),
407 provider_echoes,
408 },
409 Self::Audio {
410 source,
411 provider_echoes,
412 ..
413 } => Self::Audio {
414 source,
415 cache_control: Some(cache),
416 provider_echoes,
417 },
418 Self::Video {
419 source,
420 provider_echoes,
421 ..
422 } => Self::Video {
423 source,
424 cache_control: Some(cache),
425 provider_echoes,
426 },
427 Self::Document {
428 source,
429 name,
430 provider_echoes,
431 ..
432 } => Self::Document {
433 source,
434 name,
435 cache_control: Some(cache),
436 provider_echoes,
437 },
438 Self::Thinking {
439 text,
440 provider_echoes,
441 ..
442 } => Self::Thinking {
443 text,
444 cache_control: Some(cache),
445 provider_echoes,
446 },
447 Self::Citation {
448 snippet,
449 source,
450 provider_echoes,
451 ..
452 } => Self::Citation {
453 snippet,
454 source,
455 cache_control: Some(cache),
456 provider_echoes,
457 },
458 Self::ToolResult {
459 tool_use_id,
460 name,
461 content,
462 is_error,
463 provider_echoes,
464 ..
465 } => Self::ToolResult {
466 tool_use_id,
467 name,
468 content,
469 is_error,
470 cache_control: Some(cache),
471 provider_echoes,
472 },
473 other @ (Self::ToolUse { .. }
474 | Self::ImageOutput { .. }
475 | Self::AudioOutput { .. }
476 | Self::RedactedThinking { .. }) => other,
477 }
478 }
479
480 /// Append a vendor opaque round-trip token to this part. Codecs
481 /// call this on the decode side after extracting the wire-shape
482 /// signature / encrypted_content / data field. Returns the value
483 /// back so callers can chain.
484 #[must_use]
485 pub fn with_provider_echo(mut self, echo: ProviderEchoSnapshot) -> Self {
486 match &mut self {
487 Self::Text {
488 provider_echoes, ..
489 }
490 | Self::Image {
491 provider_echoes, ..
492 }
493 | Self::Audio {
494 provider_echoes, ..
495 }
496 | Self::Video {
497 provider_echoes, ..
498 }
499 | Self::Document {
500 provider_echoes, ..
501 }
502 | Self::Thinking {
503 provider_echoes, ..
504 }
505 | Self::RedactedThinking { provider_echoes }
506 | Self::Citation {
507 provider_echoes, ..
508 }
509 | Self::ToolUse {
510 provider_echoes, ..
511 }
512 | Self::ImageOutput {
513 provider_echoes, ..
514 }
515 | Self::AudioOutput {
516 provider_echoes, ..
517 }
518 | Self::ToolResult {
519 provider_echoes, ..
520 } => provider_echoes.push(echo),
521 }
522 self
523 }
524}
525
526/// Payload of a [`ContentPart::ToolResult`].
527#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
528#[serde(untagged)]
529#[non_exhaustive]
530pub enum ToolResultContent {
531 /// Plain text result — the most common shape.
532 Text(String),
533 /// Structured JSON result. Codecs that lack structured-result support
534 /// stringify and emit a `LossyEncode` warning.
535 Json(serde_json::Value),
536}