Skip to main content

vtcode_core/llm/provider/
request.rs

1use crate::config::types::{ReasoningEffortLevel, VerbosityLevel};
2use serde::{Deserialize, Serialize};
3use serde_json::{Value, json};
4use std::sync::Arc;
5
6use super::{Message, ToolDefinition};
7
8/// Fallback model configuration for Anthropic server-side fallback.
9/// Used with the `server-side-fallback-2026-06-01` beta header.
10#[derive(Debug, Clone, Serialize, Deserialize, Default)]
11pub struct FallbackModel {
12    /// The model identifier to fall back to (e.g., "claude-opus-4-8")
13    pub model: String,
14    /// Optional max_tokens override for this fallback attempt
15    #[serde(skip_serializing_if = "Option::is_none")]
16    pub max_tokens: Option<u32>,
17    /// Optional thinking configuration override for this fallback attempt
18    #[serde(skip_serializing_if = "Option::is_none")]
19    pub thinking: Option<AnthropicThinkingConfig>,
20}
21
22/// Anthropic thinking configuration for fallback models.
23#[derive(Debug, Clone, Serialize, Deserialize, Default)]
24#[serde(tag = "type", rename_all = "lowercase")]
25pub enum AnthropicThinkingConfig {
26    #[default]
27    Disabled,
28    Enabled {
29        budget_tokens: u32,
30        #[serde(skip_serializing_if = "Option::is_none")]
31        display: Option<String>,
32    },
33    Adaptive {
34        #[serde(skip_serializing_if = "Option::is_none")]
35        display: Option<String>,
36    },
37}
38
39#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
40#[serde(rename_all = "snake_case")]
41pub enum PromptCacheProfile {
42    BudgetContinuation,
43}
44
45#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
46#[serde(rename_all = "snake_case")]
47pub enum AnthropicThinkingModeOverride {
48    #[default]
49    Inherit,
50    Disabled,
51    Adaptive,
52    ManualBudget(u32),
53}
54
55#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
56#[serde(rename_all = "snake_case")]
57pub enum AnthropicThinkingDisplayOverride {
58    #[default]
59    Inherit,
60    Summarized,
61    Omitted,
62}
63
64#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
65#[serde(rename_all = "snake_case")]
66pub enum AnthropicOptionalStringOverride {
67    #[default]
68    Inherit,
69    Omit,
70    Explicit(String),
71}
72
73#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
74#[serde(rename_all = "snake_case")]
75pub enum AnthropicOptionalU32Override {
76    #[default]
77    Inherit,
78    Omit,
79    Explicit(u32),
80}
81
82#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
83pub struct AnthropicRequestOverrides {
84    #[serde(default)]
85    pub thinking_mode: AnthropicThinkingModeOverride,
86    #[serde(default)]
87    pub thinking_display: AnthropicThinkingDisplayOverride,
88    #[serde(default)]
89    pub effort: AnthropicOptionalStringOverride,
90    #[serde(default)]
91    pub task_budget_tokens: AnthropicOptionalU32Override,
92}
93
94/// Universal LLM request structure
95#[derive(Debug, Clone, Serialize, Deserialize, Default)]
96
97pub struct LLMRequest {
98    pub messages: Vec<Message>,
99    pub system_prompt: Option<Arc<String>>,
100    pub tools: Option<Arc<Vec<ToolDefinition>>>,
101    pub model: String,
102    pub max_tokens: Option<u32>,
103    pub temperature: Option<f32>,
104    pub stream: bool,
105
106    /// Optional structured output JSON schema to request from providers that support it
107    /// For Anthropic this will be sent as `output_config.format = { type: "json_schema", schema: ... }`
108    pub output_format: Option<Value>,
109
110    /// Tool choice configuration based on official API docs
111    /// Supports: "auto" (default), "none", "any", or specific tool selection
112    pub tool_choice: Option<ToolChoice>,
113
114    /// Whether to enable parallel tool calls (OpenAI specific)
115    pub parallel_tool_calls: Option<bool>,
116
117    /// Parallel tool use configuration following Anthropic best practices
118    pub parallel_tool_config: Option<Box<ParallelToolConfig>>,
119
120    /// Reasoning effort level for models that support it (none, minimal, low, medium, high, xhigh)
121    /// Applies to: Claude, GPT-5 family, Gemini, Qwen3, DeepSeek with reasoning capability
122    pub reasoning_effort: Option<ReasoningEffortLevel>,
123
124    /// Effort level for overall token usage (low, medium, high, xhigh, max)
125    /// Applies to: Anthropic adaptive-thinking models such as Claude Opus 4.7
126    /// Controls how many tokens Claude uses when responding, trading off between
127    /// response thoroughness and token efficiency.
128    pub effort: Option<String>,
129
130    /// Verbosity level for output text (low, medium, high)
131    /// Applies to: GPT-5.4-family Responses workflows and other models that support verbosity control
132    pub verbosity: Option<VerbosityLevel>,
133
134    /// Advanced generation parameters
135    pub do_sample: Option<bool>,
136    pub top_p: Option<f32>,
137    pub top_k: Option<i32>,
138    pub presence_penalty: Option<f32>,
139    pub frequency_penalty: Option<f32>,
140    pub stop_sequences: Option<Vec<String>>,
141    /// Optional budget for extended thinking (Anthropic specific)
142    /// Minimum value: 1024
143    pub thinking_budget: Option<u32>,
144
145    /// Optional beta headers for Anthropic (and potentially others)
146    pub betas: Option<Vec<String>>,
147
148    /// Optional provider-specific context management configuration (Anthropic compaction/editing).
149    pub context_management: Option<Value>,
150
151    /// Optional prefill text for the assistant response (Anthropic prefilling)
152    /// Incompatible with extended thinking
153    pub prefill: Option<String>,
154
155    /// Whether to enable character reinforcement (system prompt/prefill tagging)
156    pub character_reinforcement: bool,
157
158    /// Optional character name for reinforcement
159    pub character_name: Option<String>,
160
161    /// Optional coding agent specific settings
162    pub coding_agent_settings: Option<Box<CodingAgentSettings>>,
163
164    /// Optional turn metadata for git context (remote URLs, commit hash, etc.)
165    /// This is sent as X-Turn-Metadata header to providers that support it
166    pub metadata: Option<Value>,
167
168    /// Optional Responses API continuity pointer for server-side context chaining.
169    /// Used by providers that support stateful response continuation.
170    pub previous_response_id: Option<String>,
171
172    /// Optional Responses API storage flag.
173    /// When set, providers that support `store` pass this through directly.
174    pub response_store: Option<bool>,
175
176    /// Optional Responses API include fields (e.g. reasoning encrypted content).
177    /// Passed through only for providers/APIs that support include selectors.
178    pub responses_include: Option<Vec<String>>,
179
180    /// Optional native OpenAI `service_tier` request parameter.
181    /// Passed through only for native OpenAI endpoints that support service tiers.
182    pub service_tier: Option<String>,
183
184    /// Optional provider routing hint for prompt cache stickiness.
185    /// OpenAI uses this value to improve routing locality for repeated prefixes.
186    pub prompt_cache_key: Option<String>,
187
188    /// Optional request-scoped prompt cache profile for provider-specific TTL overrides.
189    pub prompt_cache_profile: Option<PromptCacheProfile>,
190
191    /// Optional fallback models for Anthropic server-side fallback (Claude Fable 5).
192    /// Requires the `server-side-fallback-2026-06-01` beta header.
193    pub fallbacks: Option<Vec<FallbackModel>>,
194
195    /// Optional opaque credit token from a refused request's `stop_details.fallback_credit_token`.
196    /// Echoed on the retry to avoid paying the prompt-cache cost twice.
197    /// Requires the `fallback-credit-2026-06-01` beta header on both the refused request and
198    /// the retry.
199    pub fallback_credit_token: Option<String>,
200
201    /// Optional Anthropic-specific request overrides used when request semantics must
202    /// not inherit VT Code's provider defaults, such as the Anthropic compatibility server.
203    pub anthropic_request_overrides: Option<AnthropicRequestOverrides>,
204}
205
206/// Optional overrides for standalone Responses compaction requests.
207#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
208pub struct ResponsesCompactionOptions {
209    /// Optional custom instructions appended to the derived replay instructions.
210    pub instructions: Option<String>,
211    /// Optional output token limit for the compaction response.
212    pub max_output_tokens: Option<u32>,
213    /// Optional reasoning effort override for the compaction pass.
214    pub reasoning_effort: Option<ReasoningEffortLevel>,
215    /// Optional verbosity override for the compaction output text settings.
216    pub verbosity: Option<VerbosityLevel>,
217    /// Optional include selectors override.
218    pub responses_include: Option<Vec<String>>,
219    /// Optional storage override.
220    pub response_store: Option<bool>,
221    /// Optional native OpenAI service tier override.
222    pub service_tier: Option<String>,
223    /// Optional prompt cache routing override.
224    pub prompt_cache_key: Option<String>,
225}
226
227/// Settings to refine model behavior for coding agent tasks
228#[derive(Debug, Clone, Serialize, Deserialize, Default)]
229pub struct CodingAgentSettings {
230    /// Encourage the model to use XML tags for structured responses
231    pub force_xml_tags: bool,
232    /// Automatically prefill with `<thought>` to encourage reasoning
233    pub prefill_thought: bool,
234    /// Explicitly allow the model to say "I don't know" or "I am unsure"
235    pub allow_uncertainty: bool,
236    /// Enforce strict grounding to provided documents
237    pub strict_grounding: bool,
238    /// Optimize for long context by hoisting large messages and grounding in quotes
239    pub long_context_optimization: bool,
240    /// Wrap multiple file contexts in structured XML tags
241    pub use_xml_document_format: bool,
242    /// Inject instructions to find quotes before carrying out the task
243    pub force_quote_grounding: bool,
244    /// Optional specialized role for Claude (e.g., "Senior Software Architect")
245    pub role_specialization: Option<String>,
246    /// Enforce the use of `<thinking>` and `<answer>` tags for manual chain-of-thought
247    pub enforce_structured_thought: bool,
248}
249
250/// Tool choice configuration that works across different providers
251/// Based on OpenAI, Anthropic, and Gemini API specifications
252/// Follows Anthropic's tool use best practices for optimal performance
253#[derive(Debug, Clone, Serialize, Deserialize)]
254#[serde(untagged)]
255#[derive(Default)]
256pub enum ToolChoice {
257    /// Let the model decide whether to call tools ("auto")
258    /// Default behavior - allows model to use tools when appropriate
259    #[default]
260    Auto,
261
262    /// Force the model to not call any tools ("none")
263    /// Useful for pure conversational responses without tool usage
264    None,
265
266    /// Force the model to call at least one tool ("any")
267    /// Ensures tool usage even when model might prefer direct response
268    Any,
269
270    /// Force the model to call a specific tool
271    /// Useful for directing model to use particular functionality
272    Specific(SpecificToolChoice),
273}
274
275/// Specific tool choice for forcing a particular function call
276#[derive(Debug, Clone, Serialize, Deserialize)]
277pub struct SpecificToolChoice {
278    #[serde(rename = "type")]
279    pub tool_type: String, // "function"
280
281    pub function: SpecificFunctionChoice,
282}
283
284/// Specific function choice details
285#[derive(Debug, Clone, Serialize, Deserialize)]
286pub struct SpecificFunctionChoice {
287    pub name: String,
288}
289
290impl ToolChoice {
291    /// Create auto tool choice (default behavior)
292    pub fn auto() -> Self {
293        Self::Auto
294    }
295
296    /// Create none tool choice (disable tool calling)
297    pub fn none() -> Self {
298        Self::None
299    }
300
301    /// Create any tool choice (force at least one tool call)
302    pub fn any() -> Self {
303        Self::Any
304    }
305
306    /// Create specific function tool choice
307    pub fn function(name: String) -> Self {
308        Self::Specific(SpecificToolChoice {
309            tool_type: "function".to_owned(),
310            function: SpecificFunctionChoice { name },
311        })
312    }
313
314    /// Check if this tool choice allows parallel tool use
315    /// Based on Anthropic's parallel tool use guidelines
316    pub fn allows_parallel_tools(&self) -> bool {
317        match self {
318            // Auto allows parallel tools by default
319            Self::Auto => true,
320            // Any forces at least one tool, may allow parallel
321            Self::Any => true,
322            // Specific forces one particular tool, typically no parallel
323            Self::Specific(_) => false,
324            // None disables tools entirely
325            Self::None => false,
326        }
327    }
328
329    /// Get human-readable description of tool choice behavior
330    pub fn description(&self) -> &'static str {
331        match self {
332            Self::Auto => "Model decides when to use tools (allows parallel)",
333            Self::None => "No tools will be used",
334            Self::Any => "At least one tool must be used (allows parallel)",
335            Self::Specific(_) => "Specific tool must be used (no parallel)",
336        }
337    }
338
339    /// OpenAI-compatible providers that share the same tool_choice format
340    const OPENAI_STYLE_PROVIDERS: &'static [&'static str] = &[
341        "openai",
342        "deepseek",
343        "huggingface",
344        "mistral",
345        "openrouter",
346        "zai",
347        "moonshot",
348        "stepfun",
349        "evolink",
350        "lmstudio",
351        "llamacpp",
352    ];
353
354    /// Convert to provider-specific format
355    #[inline]
356    pub fn to_provider_format(&self, provider: &str) -> Value {
357        if Self::OPENAI_STYLE_PROVIDERS.contains(&provider) {
358            return self.to_openai_format();
359        }
360
361        match provider {
362            "anthropic" => self.to_anthropic_format(),
363            "gemini" => self.to_gemini_format(),
364            _ => self.to_openai_format(), // Default to OpenAI format
365        }
366    }
367
368    #[inline]
369    fn to_openai_format(&self) -> Value {
370        match self {
371            Self::Auto => json!("auto"),
372            Self::None => json!("none"),
373            Self::Any => json!("required"),
374            Self::Specific(choice) => json!(choice),
375        }
376    }
377
378    #[inline]
379    fn to_anthropic_format(&self) -> Value {
380        match self {
381            Self::Auto => json!({"type": "auto"}),
382            Self::None => json!({"type": "none"}),
383            Self::Any => json!({"type": "any"}),
384            Self::Specific(choice) => json!({"type": "tool", "name": &choice.function.name}),
385        }
386    }
387
388    #[inline]
389    fn to_gemini_format(&self) -> Value {
390        match self {
391            Self::Auto => json!({"mode": "auto"}),
392            Self::None => json!({"mode": "none"}),
393            Self::Any => json!({"mode": "any"}),
394            Self::Specific(choice) => {
395                json!({"mode": "any", "allowed_function_names": [&choice.function.name]})
396            }
397        }
398    }
399}
400
401/// Configuration for parallel tool use behavior
402/// Based on Anthropic's parallel tool use guidelines
403#[derive(Debug, Clone, Serialize, Deserialize)]
404pub struct ParallelToolConfig {
405    /// Whether to disable parallel tool use
406    /// When true, forces sequential tool execution
407    pub disable_parallel_tool_use: bool,
408
409    /// Maximum number of tools to execute in parallel
410    /// None means no limit (provider default)
411    pub max_parallel_tools: Option<usize>,
412
413    /// Whether to encourage parallel tool use in prompts
414    pub encourage_parallel: bool,
415}
416
417impl Default for ParallelToolConfig {
418    fn default() -> Self {
419        Self {
420            disable_parallel_tool_use: false,
421            max_parallel_tools: Some(5), // Reasonable default
422            encourage_parallel: true,
423        }
424    }
425}
426
427impl ParallelToolConfig {
428    /// Create configuration optimized for Anthropic models
429    pub fn anthropic_optimized() -> Self {
430        Self {
431            disable_parallel_tool_use: false,
432            max_parallel_tools: None, // Let Anthropic decide
433            encourage_parallel: true,
434        }
435    }
436
437    /// Create configuration for sequential tool use
438    pub fn sequential_only() -> Self {
439        Self {
440            disable_parallel_tool_use: true,
441            max_parallel_tools: Some(1),
442            encourage_parallel: false,
443        }
444    }
445}