entelix_core/ir/provider_extensions.rs
1//! `ProviderExtensions` — typed escape hatch for vendor-specific
2//! request knobs that don't generalise to a cross-provider IR field.
3//!
4//! ## Why typed, not `serde_json::Value`
5//!
6//! Letting operators drop arbitrary JSON into a request body would
7//! erode the IR honesty contract: codecs could no longer reason
8//! about what crosses the wire, `LossyEncode` warnings would lose
9//! meaning, and a typo in a vendor-specific key would surface as a
10//! provider error rather than a Rust compile error. Each vendor
11//! gets a `*Ext` struct with concrete fields; codecs read their
12//! own ext when encoding and ignore others (with a `LossyEncode`
13//! warning when an inactive vendor's ext is present — the operator
14//! intended a knob the wire format cannot honour).
15//!
16//! ## Forward compatibility
17//!
18//! Every ext struct is `#[non_exhaustive]`. Adding a new field
19//! (e.g. a vendor ships a new beta knob) is non-breaking — operators
20//! that ignore the field keep working; operators that opt in get
21//! type-checked access via the builder.
22
23use serde::{Deserialize, Serialize};
24
25/// Per-provider typed extensions. Defaults to `None` for every
26/// vendor, which corresponds to "no overrides — codec uses its
27/// vendor default for any field operators didn't set on the IR
28/// proper".
29#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
30#[non_exhaustive]
31pub struct ProviderExtensions {
32 /// Anthropic Messages API knobs. Honoured by
33 /// `AnthropicMessagesCodec`; emits `LossyEncode` if set on
34 /// requests routed to non-Anthropic codecs.
35 #[serde(default, skip_serializing_if = "Option::is_none")]
36 pub anthropic: Option<AnthropicExt>,
37 /// OpenAI Chat Completions knobs. Honoured by `OpenAiChatCodec`.
38 #[serde(default, skip_serializing_if = "Option::is_none")]
39 pub openai_chat: Option<OpenAiChatExt>,
40 /// OpenAI Responses API knobs. Honoured by
41 /// `OpenAiResponsesCodec`.
42 #[serde(default, skip_serializing_if = "Option::is_none")]
43 pub openai_responses: Option<OpenAiResponsesExt>,
44 /// Gemini Generative Language API knobs. Honoured by
45 /// `GeminiCodec`.
46 #[serde(default, skip_serializing_if = "Option::is_none")]
47 pub gemini: Option<GeminiExt>,
48 /// Bedrock Converse API knobs. Honoured by `BedrockConverseCodec`.
49 #[serde(default, skip_serializing_if = "Option::is_none")]
50 pub bedrock: Option<BedrockExt>,
51}
52
53impl ProviderExtensions {
54 /// Builder-style attach for the Anthropic ext.
55 #[must_use]
56 pub fn with_anthropic(mut self, ext: AnthropicExt) -> Self {
57 self.anthropic = Some(ext);
58 self
59 }
60
61 /// Builder-style attach for the OpenAI Chat ext.
62 #[must_use]
63 pub fn with_openai_chat(mut self, ext: OpenAiChatExt) -> Self {
64 self.openai_chat = Some(ext);
65 self
66 }
67
68 /// Builder-style attach for the OpenAI Responses ext.
69 #[must_use]
70 pub fn with_openai_responses(mut self, ext: OpenAiResponsesExt) -> Self {
71 self.openai_responses = Some(ext);
72 self
73 }
74
75 /// Builder-style attach for the Gemini ext.
76 #[must_use]
77 pub fn with_gemini(mut self, ext: GeminiExt) -> Self {
78 self.gemini = Some(ext);
79 self
80 }
81
82 /// Builder-style attach for the Bedrock ext.
83 #[must_use]
84 pub fn with_bedrock(mut self, ext: BedrockExt) -> Self {
85 self.bedrock = Some(ext);
86 self
87 }
88
89 /// True when no vendor ext is set — codecs short-circuit the
90 /// LossyEncode-warning sweep on this hot path.
91 #[must_use]
92 pub const fn is_empty(&self) -> bool {
93 self.anthropic.is_none()
94 && self.openai_chat.is_none()
95 && self.openai_responses.is_none()
96 && self.gemini.is_none()
97 && self.bedrock.is_none()
98 }
99}
100
101/// Anthropic-specific request knobs.
102///
103/// Each field maps 1:1 to an Anthropic Messages API field that has
104/// no cross-provider equivalent. Setting one of these on a request
105/// routed to a non-Anthropic codec emits a `LossyEncode` warning.
106///
107/// Construct via [`AnthropicExt::default()`] and the `with_*`
108/// chain — direct struct-literal construction is closed off by
109/// `#[non_exhaustive]` so future field additions are non-breaking.
110#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
111#[non_exhaustive]
112pub struct AnthropicExt {
113 /// `anthropic-beta` HTTP header values — comma-joined and sent
114 /// as a single header so beta capabilities (extended thinking,
115 /// computer-use updates, prompt-caching variants, …) gate at
116 /// the transport layer per Anthropic's documented opt-in.
117 /// Anthropic-specific (no other vendor exposes a comparable
118 /// capability-gating header).
119 /// Empty vec means no beta header is sent.
120 #[serde(default, skip_serializing_if = "Vec::is_empty")]
121 pub betas: Vec<String>,
122}
123
124impl AnthropicExt {
125 /// Replace the `anthropic-beta` opt-in list. Each element rides
126 /// as one comma-separated value in the single `anthropic-beta`
127 /// header the Anthropic Messages API documents.
128 #[must_use]
129 pub fn with_betas<I, S>(mut self, betas: I) -> Self
130 where
131 I: IntoIterator<Item = S>,
132 S: Into<String>,
133 {
134 self.betas = betas.into_iter().map(Into::into).collect();
135 self
136 }
137}
138
139/// OpenAI Chat Completions API knobs.
140#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
141#[non_exhaustive]
142pub struct OpenAiChatExt {
143 /// `prompt_cache_key` — routing key into OpenAI's auto-cache
144 /// bucket. Related requests sharing this key land in the same
145 /// cache shard for higher hit rate. OpenAI-specific (Anthropic
146 /// per-block `cache_control`, Gemini `cachedContent`, and
147 /// Bedrock `cachePoint` are different mechanisms with different
148 /// shapes). Mirrored on [`OpenAiResponsesExt::cache_key`].
149 #[serde(default, skip_serializing_if = "Option::is_none")]
150 pub cache_key: Option<String>,
151 /// `service_tier` — cost / latency routing for the request.
152 /// `Flex` halves cost in exchange for higher latency, `Priority`
153 /// reserves dedicated capacity for SLA-bound workflows, `Scale`
154 /// targets sustained high-throughput tenants. OpenAI-specific
155 /// (no other vendor exposes a comparable per-request routing
156 /// knob). Mirrored on [`OpenAiResponsesExt::service_tier`].
157 #[serde(default, skip_serializing_if = "Option::is_none")]
158 pub service_tier: Option<ServiceTier>,
159}
160
161impl OpenAiChatExt {
162 /// Set the prompt-cache routing key.
163 #[must_use]
164 pub fn with_cache_key(mut self, key: impl Into<String>) -> Self {
165 self.cache_key = Some(key.into());
166 self
167 }
168 /// Set the service-tier routing.
169 #[must_use]
170 pub const fn with_service_tier(mut self, tier: ServiceTier) -> Self {
171 self.service_tier = Some(tier);
172 self
173 }
174}
175
176/// OpenAI Responses API knobs.
177#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
178#[non_exhaustive]
179pub struct OpenAiResponsesExt {
180 /// `prompt_cache_key` — routing key into OpenAI's auto-cache
181 /// bucket (same semantics as [`OpenAiChatExt::cache_key`]).
182 #[serde(default, skip_serializing_if = "Option::is_none")]
183 pub cache_key: Option<String>,
184 /// `service_tier` — cost / latency routing (same semantics as
185 /// [`OpenAiChatExt::service_tier`]).
186 #[serde(default, skip_serializing_if = "Option::is_none")]
187 pub service_tier: Option<ServiceTier>,
188 /// Reasoning summary verbosity for o-series models. When set,
189 /// the codec emits `reasoning.summary: "<mode>"` at the
190 /// Responses API request root, paired with the cross-vendor
191 /// [`crate::ir::ModelRequest::reasoning_effort`] field. The
192 /// summary knob is OpenAI-specific (Anthropic / Gemini /
193 /// Bedrock have no equivalent) so it stays on this extension
194 /// rather than the IR root.
195 #[serde(default, skip_serializing_if = "Option::is_none")]
196 pub reasoning_summary: Option<ReasoningSummary>,
197}
198
199impl OpenAiResponsesExt {
200 /// Set the prompt-cache routing key.
201 #[must_use]
202 pub fn with_cache_key(mut self, key: impl Into<String>) -> Self {
203 self.cache_key = Some(key.into());
204 self
205 }
206 /// Set the service-tier routing.
207 #[must_use]
208 pub const fn with_service_tier(mut self, tier: ServiceTier) -> Self {
209 self.service_tier = Some(tier);
210 self
211 }
212 /// Set the reasoning summary verbosity. Pair with
213 /// [`crate::ir::ModelRequest::reasoning_effort`] (the
214 /// cross-vendor effort knob) — OpenAI Responses emits both as
215 /// `reasoning: { effort, summary }`.
216 #[must_use]
217 pub const fn with_reasoning_summary(mut self, summary: ReasoningSummary) -> Self {
218 self.reasoning_summary = Some(summary);
219 self
220 }
221}
222
223/// OpenAI service-tier routing — cost / latency knob shared by both
224/// Chat Completions and Responses APIs. OpenAI-specific (no other
225/// vendor exposes a comparable per-request routing channel).
226#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
227#[serde(rename_all = "snake_case")]
228#[non_exhaustive]
229pub enum ServiceTier {
230 /// Vendor picks the tier (default behaviour when the field is
231 /// omitted; supplying `Auto` makes the choice explicit).
232 Auto,
233 /// Standard processing tier — vendor's default capacity pool.
234 Default,
235 /// Cheaper async tier with relaxed latency SLO (~50% cost cut).
236 Flex,
237 /// Reserved-capacity tier for latency-bound enterprise workflows.
238 Priority,
239 /// High-throughput sustained-traffic tier.
240 Scale,
241}
242
243/// Verbosity of the reasoning summary surfaced alongside the reply
244/// — OpenAI Responses-only (Anthropic / Gemini / Bedrock have no
245/// equivalent knob). Cross-vendor effort levels live on
246/// [`crate::ir::ReasoningEffort`].
247#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
248#[serde(rename_all = "snake_case")]
249#[non_exhaustive]
250pub enum ReasoningSummary {
251 /// Vendor picks the verbosity (`auto`).
252 Auto,
253 /// Concise summary — a few short sentences.
254 Concise,
255 /// Detailed summary — full chain breakdown.
256 Detailed,
257}
258
259/// Gemini Generative Language API knobs.
260#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
261#[non_exhaustive]
262pub struct GeminiExt {
263 /// `safetySettings` — per-category harm thresholds. Vendor
264 /// canonical names ride through (`HARM_CATEGORY_HATE_SPEECH`,
265 /// `HARM_CATEGORY_HARASSMENT`, …). Operators that need broader
266 /// coverage than [`crate::ir::SafetyCategory`] use this ext.
267 /// Gemini-specific (Anthropic / OpenAI / Bedrock expose
268 /// content-moderation through different surfaces — Anthropic
269 /// has no per-request override, OpenAI surfaces it as
270 /// `moderations` API responses, Bedrock surfaces it as
271 /// `BedrockExt::guardrail` with vendor-issued identifiers).
272 #[serde(default, skip_serializing_if = "Vec::is_empty")]
273 pub safety_settings: Vec<GeminiSafetyOverride>,
274 /// `candidateCount` — number of independent completions to
275 /// generate. Defaults to vendor default (1) when unset.
276 /// Gemini-specific (Anthropic / OpenAI / Bedrock require N
277 /// separate requests to obtain N completions; only Gemini
278 /// batches them into one call).
279 #[serde(default, skip_serializing_if = "Option::is_none")]
280 pub candidate_count: Option<u32>,
281 /// `cachedContent` — server-side cached-content reference. The
282 /// value is a vendor-minted resource name (e.g.
283 /// `cachedContents/<id>`) typically returned by a prior
284 /// `cachedContents.create` API call; the wire codec emits it
285 /// verbatim. Gemini-specific (Anthropic per-block
286 /// `cache_control`, OpenAI `prompt_cache_key`, and Bedrock
287 /// `cachePoint` are different mechanisms with different shapes).
288 #[serde(default, skip_serializing_if = "Option::is_none")]
289 pub cached_content: Option<String>,
290 /// `url_context` — opt into Gemini's built-in URL-fetch tool.
291 /// When enabled the codec emits `tools[].url_context: {}` so
292 /// the model can autonomously fetch and ground on URLs found
293 /// in the prompt (up to 20 per request, vendor-side cap).
294 /// Gemini-specific (no other vendor exposes a comparable
295 /// in-context URL-fetch primitive).
296 #[serde(default, skip_serializing_if = "Option::is_none")]
297 pub url_context: Option<UrlContext>,
298}
299
300impl GeminiExt {
301 /// Append a safety-category override.
302 #[must_use]
303 pub fn with_safety_override(mut self, category: &str, threshold: &str) -> Self {
304 self.safety_settings.push(GeminiSafetyOverride {
305 category: category.to_owned(),
306 threshold: threshold.to_owned(),
307 });
308 self
309 }
310 /// Set the `candidateCount`.
311 #[must_use]
312 pub const fn with_candidate_count(mut self, n: u32) -> Self {
313 self.candidate_count = Some(n);
314 self
315 }
316 /// Set the server-side cached-content reference.
317 #[must_use]
318 pub fn with_cached_content(mut self, name: impl Into<String>) -> Self {
319 self.cached_content = Some(name.into());
320 self
321 }
322 /// Enable Gemini's built-in `url_context` tool for the request.
323 #[must_use]
324 pub const fn with_url_context(mut self) -> Self {
325 self.url_context = Some(UrlContext::ENABLED);
326 self
327 }
328}
329
330/// Toggle marker for Gemini's built-in `url_context` tool. Constructed
331/// via [`UrlContext::ENABLED`] (or `Default`) — the wire shape is a
332/// parameterless `{}` object, so no runtime fields exist today; the
333/// struct is `#[non_exhaustive]` so a future Gemini release that
334/// surfaces options (max URLs, content filters, …) lands additively.
335#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
336#[non_exhaustive]
337pub struct UrlContext;
338
339impl UrlContext {
340 /// Single canonical instance — the tool is parameterless on the
341 /// wire today, so every enable site shares this value.
342 pub const ENABLED: Self = Self;
343}
344
345/// One Gemini safety-category override. Vendor names ride through
346/// verbatim — see Gemini API docs for the full list.
347#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
348pub struct GeminiSafetyOverride {
349 /// Category name (e.g. `HARM_CATEGORY_HATE_SPEECH`).
350 pub category: String,
351 /// Threshold name (e.g. `BLOCK_LOW_AND_ABOVE`).
352 pub threshold: String,
353}
354
355/// Bedrock Converse API knobs.
356#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
357#[non_exhaustive]
358pub struct BedrockExt {
359 /// Bedrock guardrail to enforce on the request. Carries the
360 /// guardrail identifier and version Bedrock issued at
361 /// console-create time. Bedrock-specific (Anthropic /
362 /// OpenAI / Gemini have no equivalent operator-defined
363 /// per-request safety policy reference; safety is either
364 /// vendor-managed or surfaced as
365 /// [`GeminiExt::safety_settings`] inline overrides).
366 #[serde(default, skip_serializing_if = "Option::is_none")]
367 pub guardrail: Option<BedrockGuardrail>,
368 /// Performance-tier hint (`standard` / `optimized`). Bedrock
369 /// uses this to route to a faster pool for latency-sensitive
370 /// workflows where a fraction of model quality is acceptable.
371 /// Bedrock-specific (Anthropic / OpenAI / Gemini do not
372 /// expose a per-request latency-vs-quality routing knob).
373 #[serde(default, skip_serializing_if = "Option::is_none")]
374 pub performance_config_tier: Option<String>,
375}
376
377impl BedrockExt {
378 /// Attach a guardrail.
379 #[must_use]
380 pub fn with_guardrail(mut self, guardrail: BedrockGuardrail) -> Self {
381 self.guardrail = Some(guardrail);
382 self
383 }
384 /// Set the performance-tier hint.
385 #[must_use]
386 pub fn with_performance_config_tier(mut self, tier: impl Into<String>) -> Self {
387 self.performance_config_tier = Some(tier.into());
388 self
389 }
390}
391
392/// Bedrock guardrail reference.
393#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
394pub struct BedrockGuardrail {
395 /// Guardrail identifier (UUID-like string AWS issued).
396 pub identifier: String,
397 /// Guardrail version (e.g. `"DRAFT"`, `"1"`).
398 pub version: String,
399}
400
401#[cfg(test)]
402#[allow(clippy::unwrap_used)]
403mod tests {
404 use super::*;
405
406 #[test]
407 fn default_is_empty() {
408 let ext = ProviderExtensions::default();
409 assert!(ext.is_empty());
410 }
411
412 #[test]
413 fn builder_chain_attaches_each_vendor_ext() {
414 let ext = ProviderExtensions::default()
415 .with_anthropic(AnthropicExt::default().with_betas(["thinking-2025"]))
416 .with_openai_chat(OpenAiChatExt::default().with_cache_key("user-42"))
417 .with_gemini(GeminiExt {
418 candidate_count: Some(2),
419 ..Default::default()
420 })
421 .with_bedrock(BedrockExt {
422 guardrail: Some(BedrockGuardrail {
423 identifier: "abc-123".into(),
424 version: "1".into(),
425 }),
426 ..Default::default()
427 });
428 assert!(!ext.is_empty());
429 assert_eq!(
430 ext.anthropic.as_ref().unwrap().betas,
431 vec!["thinking-2025".to_owned()]
432 );
433 assert_eq!(
434 ext.openai_chat.as_ref().unwrap().cache_key.as_deref(),
435 Some("user-42")
436 );
437 assert_eq!(ext.gemini.as_ref().unwrap().candidate_count, Some(2));
438 assert_eq!(
439 ext.bedrock
440 .as_ref()
441 .unwrap()
442 .guardrail
443 .as_ref()
444 .unwrap()
445 .identifier,
446 "abc-123"
447 );
448 }
449
450 #[test]
451 fn provider_extensions_serde_round_trip() {
452 let ext = ProviderExtensions::default()
453 .with_anthropic(AnthropicExt::default().with_betas(["computer-use-2025"]))
454 .with_gemini(GeminiExt {
455 safety_settings: vec![GeminiSafetyOverride {
456 category: "HARM_CATEGORY_HATE_SPEECH".into(),
457 threshold: "BLOCK_LOW_AND_ABOVE".into(),
458 }],
459 candidate_count: None,
460 cached_content: None,
461 url_context: None,
462 });
463 let s = serde_json::to_string(&ext).unwrap();
464 let back: ProviderExtensions = serde_json::from_str(&s).unwrap();
465 assert_eq!(ext, back);
466 }
467
468 #[test]
469 fn empty_serialization_omits_inactive_vendor_keys() {
470 // `skip_serializing_if = "Option::is_none"` keeps the wire
471 // small for the common "no extensions" case.
472 let ext = ProviderExtensions::default();
473 let s = serde_json::to_string(&ext).unwrap();
474 assert_eq!(s, "{}");
475 }
476}