Skip to main content

entelix_core/ir/
provider_extensions.rs

1//! `ProviderExtensions` — typed escape hatch for vendor-specific
2//! request knobs that don't generalise to a cross-provider IR field.
3//!
4//! ## Why typed, not `serde_json::Value`
5//!
6//! Letting operators drop arbitrary JSON into a request body would
7//! erode the IR honesty contract: codecs could no longer reason
8//! about what crosses the wire, `LossyEncode` warnings would lose
9//! meaning, and a typo in a vendor-specific key would surface as a
10//! provider error rather than a Rust compile error. Each vendor
11//! gets a `*Ext` struct with concrete fields; codecs read their
12//! own ext when encoding and ignore others (with a `LossyEncode`
13//! warning when an inactive vendor's ext is present — the operator
14//! intended a knob the wire format cannot honour).
15//!
16//! ## Forward compatibility
17//!
18//! Every ext struct is `#[non_exhaustive]`. Adding a new field
19//! (e.g. a vendor ships a new beta knob) is non-breaking — operators
20//! that ignore the field keep working; operators that opt in get
21//! type-checked access via the builder.
22
23use serde::{Deserialize, Serialize};
24
25/// Per-provider typed extensions. Defaults to `None` for every
26/// vendor, which corresponds to "no overrides — codec uses its
27/// vendor default for any field operators didn't set on the IR
28/// proper".
29#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
30#[non_exhaustive]
31pub struct ProviderExtensions {
32    /// Anthropic Messages API knobs. Honoured by
33    /// `AnthropicMessagesCodec`; emits `LossyEncode` if set on
34    /// requests routed to non-Anthropic codecs.
35    #[serde(default, skip_serializing_if = "Option::is_none")]
36    pub anthropic: Option<AnthropicExt>,
37    /// OpenAI Chat Completions knobs. Honoured by `OpenAiChatCodec`.
38    #[serde(default, skip_serializing_if = "Option::is_none")]
39    pub openai_chat: Option<OpenAiChatExt>,
40    /// OpenAI Responses API knobs. Honoured by
41    /// `OpenAiResponsesCodec`.
42    #[serde(default, skip_serializing_if = "Option::is_none")]
43    pub openai_responses: Option<OpenAiResponsesExt>,
44    /// Gemini Generative Language API knobs. Honoured by
45    /// `GeminiCodec`.
46    #[serde(default, skip_serializing_if = "Option::is_none")]
47    pub gemini: Option<GeminiExt>,
48    /// Bedrock Converse API knobs. Honoured by `BedrockConverseCodec`.
49    #[serde(default, skip_serializing_if = "Option::is_none")]
50    pub bedrock: Option<BedrockExt>,
51}
52
53impl ProviderExtensions {
54    /// Builder-style attach for the Anthropic ext.
55    #[must_use]
56    pub fn with_anthropic(mut self, ext: AnthropicExt) -> Self {
57        self.anthropic = Some(ext);
58        self
59    }
60
61    /// Builder-style attach for the OpenAI Chat ext.
62    #[must_use]
63    pub fn with_openai_chat(mut self, ext: OpenAiChatExt) -> Self {
64        self.openai_chat = Some(ext);
65        self
66    }
67
68    /// Builder-style attach for the OpenAI Responses ext.
69    #[must_use]
70    pub fn with_openai_responses(mut self, ext: OpenAiResponsesExt) -> Self {
71        self.openai_responses = Some(ext);
72        self
73    }
74
75    /// Builder-style attach for the Gemini ext.
76    #[must_use]
77    pub fn with_gemini(mut self, ext: GeminiExt) -> Self {
78        self.gemini = Some(ext);
79        self
80    }
81
82    /// Builder-style attach for the Bedrock ext.
83    #[must_use]
84    pub fn with_bedrock(mut self, ext: BedrockExt) -> Self {
85        self.bedrock = Some(ext);
86        self
87    }
88
89    /// True when no vendor ext is set — codecs short-circuit the
90    /// LossyEncode-warning sweep on this hot path.
91    #[must_use]
92    pub const fn is_empty(&self) -> bool {
93        self.anthropic.is_none()
94            && self.openai_chat.is_none()
95            && self.openai_responses.is_none()
96            && self.gemini.is_none()
97            && self.bedrock.is_none()
98    }
99}
100
101/// Anthropic-specific request knobs.
102///
103/// Each field maps 1:1 to an Anthropic Messages API field that has
104/// no cross-provider equivalent. Setting one of these on a request
105/// routed to a non-Anthropic codec emits a `LossyEncode` warning.
106///
107/// Construct via [`AnthropicExt::default()`] and the `with_*`
108/// chain — direct struct-literal construction is closed off by
109/// `#[non_exhaustive]` so future field additions are non-breaking.
110#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
111#[non_exhaustive]
112pub struct AnthropicExt {
113    /// `anthropic-beta` HTTP header values — comma-joined and sent
114    /// as a single header so beta capabilities (extended thinking,
115    /// computer-use updates, prompt-caching variants, …) gate at
116    /// the transport layer per Anthropic's documented opt-in.
117    /// Anthropic-specific (no other vendor exposes a comparable
118    /// capability-gating header).
119    /// Empty vec means no beta header is sent.
120    #[serde(default, skip_serializing_if = "Vec::is_empty")]
121    pub betas: Vec<String>,
122}
123
124impl AnthropicExt {
125    /// Replace the `anthropic-beta` opt-in list. Each element rides
126    /// as one comma-separated value in the single `anthropic-beta`
127    /// header the Anthropic Messages API documents.
128    #[must_use]
129    pub fn with_betas<I, S>(mut self, betas: I) -> Self
130    where
131        I: IntoIterator<Item = S>,
132        S: Into<String>,
133    {
134        self.betas = betas.into_iter().map(Into::into).collect();
135        self
136    }
137}
138
139/// OpenAI Chat Completions API knobs.
140#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
141#[non_exhaustive]
142pub struct OpenAiChatExt {
143    /// `prompt_cache_key` — routing key into OpenAI's auto-cache
144    /// bucket. Related requests sharing this key land in the same
145    /// cache shard for higher hit rate. OpenAI-specific (Anthropic
146    /// per-block `cache_control`, Gemini `cachedContent`, and
147    /// Bedrock `cachePoint` are different mechanisms with different
148    /// shapes). Mirrored on [`OpenAiResponsesExt::cache_key`].
149    #[serde(default, skip_serializing_if = "Option::is_none")]
150    pub cache_key: Option<String>,
151    /// `service_tier` — cost / latency routing for the request.
152    /// `Flex` halves cost in exchange for higher latency, `Priority`
153    /// reserves dedicated capacity for SLA-bound workflows, `Scale`
154    /// targets sustained high-throughput tenants. OpenAI-specific
155    /// (no other vendor exposes a comparable per-request routing
156    /// knob). Mirrored on [`OpenAiResponsesExt::service_tier`].
157    #[serde(default, skip_serializing_if = "Option::is_none")]
158    pub service_tier: Option<ServiceTier>,
159}
160
161impl OpenAiChatExt {
162    /// Set the prompt-cache routing key.
163    #[must_use]
164    pub fn with_cache_key(mut self, key: impl Into<String>) -> Self {
165        self.cache_key = Some(key.into());
166        self
167    }
168    /// Set the service-tier routing.
169    #[must_use]
170    pub const fn with_service_tier(mut self, tier: ServiceTier) -> Self {
171        self.service_tier = Some(tier);
172        self
173    }
174}
175
176/// OpenAI Responses API knobs.
177#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
178#[non_exhaustive]
179pub struct OpenAiResponsesExt {
180    /// `prompt_cache_key` — routing key into OpenAI's auto-cache
181    /// bucket (same semantics as [`OpenAiChatExt::cache_key`]).
182    #[serde(default, skip_serializing_if = "Option::is_none")]
183    pub cache_key: Option<String>,
184    /// `service_tier` — cost / latency routing (same semantics as
185    /// [`OpenAiChatExt::service_tier`]).
186    #[serde(default, skip_serializing_if = "Option::is_none")]
187    pub service_tier: Option<ServiceTier>,
188    /// Reasoning summary verbosity for o-series models. When set,
189    /// the codec emits `reasoning.summary: "<mode>"` at the
190    /// Responses API request root, paired with the cross-vendor
191    /// [`crate::ir::ModelRequest::reasoning_effort`] field. The
192    /// summary knob is OpenAI-specific (Anthropic / Gemini /
193    /// Bedrock have no equivalent) so it stays on this extension
194    /// rather than the IR root.
195    #[serde(default, skip_serializing_if = "Option::is_none")]
196    pub reasoning_summary: Option<ReasoningSummary>,
197}
198
199impl OpenAiResponsesExt {
200    /// Set the prompt-cache routing key.
201    #[must_use]
202    pub fn with_cache_key(mut self, key: impl Into<String>) -> Self {
203        self.cache_key = Some(key.into());
204        self
205    }
206    /// Set the service-tier routing.
207    #[must_use]
208    pub const fn with_service_tier(mut self, tier: ServiceTier) -> Self {
209        self.service_tier = Some(tier);
210        self
211    }
212    /// Set the reasoning summary verbosity. Pair with
213    /// [`crate::ir::ModelRequest::reasoning_effort`] (the
214    /// cross-vendor effort knob) — OpenAI Responses emits both as
215    /// `reasoning: { effort, summary }`.
216    #[must_use]
217    pub const fn with_reasoning_summary(mut self, summary: ReasoningSummary) -> Self {
218        self.reasoning_summary = Some(summary);
219        self
220    }
221}
222
223/// OpenAI service-tier routing — cost / latency knob shared by both
224/// Chat Completions and Responses APIs. OpenAI-specific (no other
225/// vendor exposes a comparable per-request routing channel).
226#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
227#[serde(rename_all = "snake_case")]
228#[non_exhaustive]
229pub enum ServiceTier {
230    /// Vendor picks the tier (default behaviour when the field is
231    /// omitted; supplying `Auto` makes the choice explicit).
232    Auto,
233    /// Standard processing tier — vendor's default capacity pool.
234    Default,
235    /// Cheaper async tier with relaxed latency SLO (~50% cost cut).
236    Flex,
237    /// Reserved-capacity tier for latency-bound enterprise workflows.
238    Priority,
239    /// High-throughput sustained-traffic tier.
240    Scale,
241}
242
243/// Verbosity of the reasoning summary surfaced alongside the reply
244/// — OpenAI Responses-only (Anthropic / Gemini / Bedrock have no
245/// equivalent knob). Cross-vendor effort levels live on
246/// [`crate::ir::ReasoningEffort`].
247#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
248#[serde(rename_all = "snake_case")]
249#[non_exhaustive]
250pub enum ReasoningSummary {
251    /// Vendor picks the verbosity (`auto`).
252    Auto,
253    /// Concise summary — a few short sentences.
254    Concise,
255    /// Detailed summary — full chain breakdown.
256    Detailed,
257}
258
259/// Gemini Generative Language API knobs.
260#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
261#[non_exhaustive]
262pub struct GeminiExt {
263    /// `safetySettings` — per-category harm thresholds. Vendor
264    /// canonical names ride through (`HARM_CATEGORY_HATE_SPEECH`,
265    /// `HARM_CATEGORY_HARASSMENT`, …). Operators that need broader
266    /// coverage than [`crate::ir::SafetyCategory`] use this ext.
267    /// Gemini-specific (Anthropic / OpenAI / Bedrock expose
268    /// content-moderation through different surfaces — Anthropic
269    /// has no per-request override, OpenAI surfaces it as
270    /// `moderations` API responses, Bedrock surfaces it as
271    /// `BedrockExt::guardrail` with vendor-issued identifiers).
272    #[serde(default, skip_serializing_if = "Vec::is_empty")]
273    pub safety_settings: Vec<GeminiSafetyOverride>,
274    /// `candidateCount` — number of independent completions to
275    /// generate. Defaults to vendor default (1) when unset.
276    /// Gemini-specific (Anthropic / OpenAI / Bedrock require N
277    /// separate requests to obtain N completions; only Gemini
278    /// batches them into one call).
279    #[serde(default, skip_serializing_if = "Option::is_none")]
280    pub candidate_count: Option<u32>,
281    /// `cachedContent` — server-side cached-content reference. The
282    /// value is a vendor-minted resource name (e.g.
283    /// `cachedContents/<id>`) typically returned by a prior
284    /// `cachedContents.create` API call; the wire codec emits it
285    /// verbatim. Gemini-specific (Anthropic per-block
286    /// `cache_control`, OpenAI `prompt_cache_key`, and Bedrock
287    /// `cachePoint` are different mechanisms with different shapes).
288    #[serde(default, skip_serializing_if = "Option::is_none")]
289    pub cached_content: Option<String>,
290    /// `url_context` — opt into Gemini's built-in URL-fetch tool.
291    /// When enabled the codec emits `tools[].url_context: {}` so
292    /// the model can autonomously fetch and ground on URLs found
293    /// in the prompt (up to 20 per request, vendor-side cap).
294    /// Gemini-specific (no other vendor exposes a comparable
295    /// in-context URL-fetch primitive).
296    #[serde(default, skip_serializing_if = "Option::is_none")]
297    pub url_context: Option<UrlContext>,
298}
299
300impl GeminiExt {
301    /// Append a safety-category override.
302    #[must_use]
303    pub fn with_safety_override(mut self, category: &str, threshold: &str) -> Self {
304        self.safety_settings.push(GeminiSafetyOverride {
305            category: category.to_owned(),
306            threshold: threshold.to_owned(),
307        });
308        self
309    }
310    /// Set the `candidateCount`.
311    #[must_use]
312    pub const fn with_candidate_count(mut self, n: u32) -> Self {
313        self.candidate_count = Some(n);
314        self
315    }
316    /// Set the server-side cached-content reference.
317    #[must_use]
318    pub fn with_cached_content(mut self, name: impl Into<String>) -> Self {
319        self.cached_content = Some(name.into());
320        self
321    }
322    /// Enable Gemini's built-in `url_context` tool for the request.
323    #[must_use]
324    pub const fn with_url_context(mut self) -> Self {
325        self.url_context = Some(UrlContext::ENABLED);
326        self
327    }
328}
329
330/// Toggle marker for Gemini's built-in `url_context` tool. Constructed
331/// via [`UrlContext::ENABLED`] (or `Default`) — the wire shape is a
332/// parameterless `{}` object, so no runtime fields exist today; the
333/// struct is `#[non_exhaustive]` so a future Gemini release that
334/// surfaces options (max URLs, content filters, …) lands additively.
335#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
336#[non_exhaustive]
337pub struct UrlContext;
338
339impl UrlContext {
340    /// Single canonical instance — the tool is parameterless on the
341    /// wire today, so every enable site shares this value.
342    pub const ENABLED: Self = Self;
343}
344
345/// One Gemini safety-category override. Vendor names ride through
346/// verbatim — see Gemini API docs for the full list.
347#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
348pub struct GeminiSafetyOverride {
349    /// Category name (e.g. `HARM_CATEGORY_HATE_SPEECH`).
350    pub category: String,
351    /// Threshold name (e.g. `BLOCK_LOW_AND_ABOVE`).
352    pub threshold: String,
353}
354
355/// Bedrock Converse API knobs.
356#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
357#[non_exhaustive]
358pub struct BedrockExt {
359    /// Bedrock guardrail to enforce on the request. Carries the
360    /// guardrail identifier and version Bedrock issued at
361    /// console-create time. Bedrock-specific (Anthropic /
362    /// OpenAI / Gemini have no equivalent operator-defined
363    /// per-request safety policy reference; safety is either
364    /// vendor-managed or surfaced as
365    /// [`GeminiExt::safety_settings`] inline overrides).
366    #[serde(default, skip_serializing_if = "Option::is_none")]
367    pub guardrail: Option<BedrockGuardrail>,
368    /// Performance-tier hint (`standard` / `optimized`). Bedrock
369    /// uses this to route to a faster pool for latency-sensitive
370    /// workflows where a fraction of model quality is acceptable.
371    /// Bedrock-specific (Anthropic / OpenAI / Gemini do not
372    /// expose a per-request latency-vs-quality routing knob).
373    #[serde(default, skip_serializing_if = "Option::is_none")]
374    pub performance_config_tier: Option<String>,
375}
376
377impl BedrockExt {
378    /// Attach a guardrail.
379    #[must_use]
380    pub fn with_guardrail(mut self, guardrail: BedrockGuardrail) -> Self {
381        self.guardrail = Some(guardrail);
382        self
383    }
384    /// Set the performance-tier hint.
385    #[must_use]
386    pub fn with_performance_config_tier(mut self, tier: impl Into<String>) -> Self {
387        self.performance_config_tier = Some(tier.into());
388        self
389    }
390}
391
392/// Bedrock guardrail reference.
393#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
394pub struct BedrockGuardrail {
395    /// Guardrail identifier (UUID-like string AWS issued).
396    pub identifier: String,
397    /// Guardrail version (e.g. `"DRAFT"`, `"1"`).
398    pub version: String,
399}
400
401#[cfg(test)]
402#[allow(clippy::unwrap_used)]
403mod tests {
404    use super::*;
405
406    #[test]
407    fn default_is_empty() {
408        let ext = ProviderExtensions::default();
409        assert!(ext.is_empty());
410    }
411
412    #[test]
413    fn builder_chain_attaches_each_vendor_ext() {
414        let ext = ProviderExtensions::default()
415            .with_anthropic(AnthropicExt::default().with_betas(["thinking-2025"]))
416            .with_openai_chat(OpenAiChatExt::default().with_cache_key("user-42"))
417            .with_gemini(GeminiExt {
418                candidate_count: Some(2),
419                ..Default::default()
420            })
421            .with_bedrock(BedrockExt {
422                guardrail: Some(BedrockGuardrail {
423                    identifier: "abc-123".into(),
424                    version: "1".into(),
425                }),
426                ..Default::default()
427            });
428        assert!(!ext.is_empty());
429        assert_eq!(
430            ext.anthropic.as_ref().unwrap().betas,
431            vec!["thinking-2025".to_owned()]
432        );
433        assert_eq!(
434            ext.openai_chat.as_ref().unwrap().cache_key.as_deref(),
435            Some("user-42")
436        );
437        assert_eq!(ext.gemini.as_ref().unwrap().candidate_count, Some(2));
438        assert_eq!(
439            ext.bedrock
440                .as_ref()
441                .unwrap()
442                .guardrail
443                .as_ref()
444                .unwrap()
445                .identifier,
446            "abc-123"
447        );
448    }
449
450    #[test]
451    fn provider_extensions_serde_round_trip() {
452        let ext = ProviderExtensions::default()
453            .with_anthropic(AnthropicExt::default().with_betas(["computer-use-2025"]))
454            .with_gemini(GeminiExt {
455                safety_settings: vec![GeminiSafetyOverride {
456                    category: "HARM_CATEGORY_HATE_SPEECH".into(),
457                    threshold: "BLOCK_LOW_AND_ABOVE".into(),
458                }],
459                candidate_count: None,
460                cached_content: None,
461                url_context: None,
462            });
463        let s = serde_json::to_string(&ext).unwrap();
464        let back: ProviderExtensions = serde_json::from_str(&s).unwrap();
465        assert_eq!(ext, back);
466    }
467
468    #[test]
469    fn empty_serialization_omits_inactive_vendor_keys() {
470        // `skip_serializing_if = "Option::is_none"` keeps the wire
471        // small for the common "no extensions" case.
472        let ext = ProviderExtensions::default();
473        let s = serde_json::to_string(&ext).unwrap();
474        assert_eq!(s, "{}");
475    }
476}