vtcode_core/llm/provider/
request.rs

1use crate::config::types::{ReasoningEffortLevel, VerbosityLevel};
2use serde::{Deserialize, Serialize};
3use serde_json::{Value, json};
4use std::sync::Arc;
5
6use super::{Message, ToolDefinition};
7
8#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
9#[serde(rename_all = "snake_case")]
10pub enum PromptCacheProfile {
11    BudgetContinuation,
12}
13
14#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
15#[serde(rename_all = "snake_case")]
16pub enum AnthropicThinkingModeOverride {
17    #[default]
18    Inherit,
19    Disabled,
20    Adaptive,
21    ManualBudget(u32),
22}
23
24#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
25#[serde(rename_all = "snake_case")]
26pub enum AnthropicThinkingDisplayOverride {
27    #[default]
28    Inherit,
29    Summarized,
30    Omitted,
31}
32
33#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
34#[serde(rename_all = "snake_case")]
35pub enum AnthropicOptionalStringOverride {
36    #[default]
37    Inherit,
38    Omit,
39    Explicit(String),
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
43#[serde(rename_all = "snake_case")]
44pub enum AnthropicOptionalU32Override {
45    #[default]
46    Inherit,
47    Omit,
48    Explicit(u32),
49}
50
51#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
52pub struct AnthropicRequestOverrides {
53    #[serde(default)]
54    pub thinking_mode: AnthropicThinkingModeOverride,
55    #[serde(default)]
56    pub thinking_display: AnthropicThinkingDisplayOverride,
57    #[serde(default)]
58    pub effort: AnthropicOptionalStringOverride,
59    #[serde(default)]
60    pub task_budget_tokens: AnthropicOptionalU32Override,
61}
62
63/// Universal LLM request structure
64#[derive(Debug, Clone, Serialize, Deserialize, Default)]
65
66pub struct LLMRequest {
67    pub messages: Vec<Message>,
68    pub system_prompt: Option<Arc<String>>,
69    pub tools: Option<Arc<Vec<ToolDefinition>>>,
70    pub model: String,
71    pub max_tokens: Option<u32>,
72    pub temperature: Option<f32>,
73    pub stream: bool,
74
75    /// Optional structured output JSON schema to request from providers that support it
76    /// For Anthropic this will be sent as `output_config.format = { type: "json_schema", schema: ... }`
77    pub output_format: Option<Value>,
78
79    /// Tool choice configuration based on official API docs
80    /// Supports: "auto" (default), "none", "any", or specific tool selection
81    pub tool_choice: Option<ToolChoice>,
82
83    /// Whether to enable parallel tool calls (OpenAI specific)
84    pub parallel_tool_calls: Option<bool>,
85
86    /// Parallel tool use configuration following Anthropic best practices
87    pub parallel_tool_config: Option<Box<ParallelToolConfig>>,
88
89    /// Reasoning effort level for models that support it (none, minimal, low, medium, high, xhigh)
90    /// Applies to: Claude, GPT-5 family, Gemini, Qwen3, DeepSeek with reasoning capability
91    pub reasoning_effort: Option<ReasoningEffortLevel>,
92
93    /// Effort level for overall token usage (low, medium, high, xhigh, max)
94    /// Applies to: Anthropic adaptive-thinking models such as Claude Opus 4.7
95    /// Controls how many tokens Claude uses when responding, trading off between
96    /// response thoroughness and token efficiency.
97    pub effort: Option<String>,
98
99    /// Verbosity level for output text (low, medium, high)
100    /// Applies to: GPT-5.4-family Responses workflows and other models that support verbosity control
101    pub verbosity: Option<VerbosityLevel>,
102
103    /// Advanced generation parameters
104    pub do_sample: Option<bool>,
105    pub top_p: Option<f32>,
106    pub top_k: Option<i32>,
107    pub presence_penalty: Option<f32>,
108    pub frequency_penalty: Option<f32>,
109    pub stop_sequences: Option<Vec<String>>,
110    /// Optional budget for extended thinking (Anthropic specific)
111    /// Minimum value: 1024
112    pub thinking_budget: Option<u32>,
113
114    /// Optional beta headers for Anthropic (and potentially others)
115    pub betas: Option<Vec<String>>,
116
117    /// Optional provider-specific context management configuration (Anthropic compaction/editing).
118    pub context_management: Option<Value>,
119
120    /// Optional prefill text for the assistant response (Anthropic prefilling)
121    /// Incompatible with extended thinking
122    pub prefill: Option<String>,
123
124    /// Whether to enable character reinforcement (system prompt/prefill tagging)
125    pub character_reinforcement: bool,
126
127    /// Optional character name for reinforcement
128    pub character_name: Option<String>,
129
130    /// Optional coding agent specific settings
131    pub coding_agent_settings: Option<Box<CodingAgentSettings>>,
132
133    /// Optional turn metadata for git context (remote URLs, commit hash, etc.)
134    /// This is sent as X-Turn-Metadata header to providers that support it
135    pub metadata: Option<Value>,
136
137    /// Optional Responses API continuity pointer for server-side context chaining.
138    /// Used by providers that support stateful response continuation.
139    pub previous_response_id: Option<String>,
140
141    /// Optional Responses API storage flag.
142    /// When set, providers that support `store` pass this through directly.
143    pub response_store: Option<bool>,
144
145    /// Optional Responses API include fields (e.g. reasoning encrypted content).
146    /// Passed through only for providers/APIs that support include selectors.
147    pub responses_include: Option<Vec<String>>,
148
149    /// Optional native OpenAI `service_tier` request parameter.
150    /// Passed through only for native OpenAI endpoints that support service tiers.
151    pub service_tier: Option<String>,
152
153    /// Optional provider routing hint for prompt cache stickiness.
154    /// OpenAI uses this value to improve routing locality for repeated prefixes.
155    pub prompt_cache_key: Option<String>,
156
157    /// Optional request-scoped prompt cache profile for provider-specific TTL overrides.
158    pub prompt_cache_profile: Option<PromptCacheProfile>,
159
160    /// Optional Anthropic-specific request overrides used when request semantics must
161    /// not inherit VT Code's provider defaults, such as the Anthropic compatibility server.
162    pub anthropic_request_overrides: Option<AnthropicRequestOverrides>,
163}
164
165/// Optional overrides for standalone Responses compaction requests.
166#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
167pub struct ResponsesCompactionOptions {
168    /// Optional custom instructions appended to the derived replay instructions.
169    pub instructions: Option<String>,
170    /// Optional output token limit for the compaction response.
171    pub max_output_tokens: Option<u32>,
172    /// Optional reasoning effort override for the compaction pass.
173    pub reasoning_effort: Option<ReasoningEffortLevel>,
174    /// Optional verbosity override for the compaction output text settings.
175    pub verbosity: Option<VerbosityLevel>,
176    /// Optional include selectors override.
177    pub responses_include: Option<Vec<String>>,
178    /// Optional storage override.
179    pub response_store: Option<bool>,
180    /// Optional native OpenAI service tier override.
181    pub service_tier: Option<String>,
182    /// Optional prompt cache routing override.
183    pub prompt_cache_key: Option<String>,
184}
185
186/// Settings to refine model behavior for coding agent tasks
187#[derive(Debug, Clone, Serialize, Deserialize, Default)]
188pub struct CodingAgentSettings {
189    /// Encourage the model to use XML tags for structured responses
190    pub force_xml_tags: bool,
191    /// Automatically prefill with `<thought>` to encourage reasoning
192    pub prefill_thought: bool,
193    /// Explicitly allow the model to say "I don't know" or "I am unsure"
194    pub allow_uncertainty: bool,
195    /// Enforce strict grounding to provided documents
196    pub strict_grounding: bool,
197    /// Optimize for long context by hoisting large messages and grounding in quotes
198    pub long_context_optimization: bool,
199    /// Wrap multiple file contexts in structured XML tags
200    pub use_xml_document_format: bool,
201    /// Inject instructions to find quotes before carrying out the task
202    pub force_quote_grounding: bool,
203    /// Optional specialized role for Claude (e.g., "Senior Software Architect")
204    pub role_specialization: Option<String>,
205    /// Enforce the use of `<thinking>` and `<answer>` tags for manual chain-of-thought
206    pub enforce_structured_thought: bool,
207}
208
209/// Tool choice configuration that works across different providers
210/// Based on OpenAI, Anthropic, and Gemini API specifications
211/// Follows Anthropic's tool use best practices for optimal performance
212#[derive(Debug, Clone, Serialize, Deserialize)]
213#[serde(untagged)]
214#[derive(Default)]
215pub enum ToolChoice {
216    /// Let the model decide whether to call tools ("auto")
217    /// Default behavior - allows model to use tools when appropriate
218    #[default]
219    Auto,
220
221    /// Force the model to not call any tools ("none")
222    /// Useful for pure conversational responses without tool usage
223    None,
224
225    /// Force the model to call at least one tool ("any")
226    /// Ensures tool usage even when model might prefer direct response
227    Any,
228
229    /// Force the model to call a specific tool
230    /// Useful for directing model to use particular functionality
231    Specific(SpecificToolChoice),
232}
233
234/// Specific tool choice for forcing a particular function call
235#[derive(Debug, Clone, Serialize, Deserialize)]
236pub struct SpecificToolChoice {
237    #[serde(rename = "type")]
238    pub tool_type: String, // "function"
239
240    pub function: SpecificFunctionChoice,
241}
242
243/// Specific function choice details
244#[derive(Debug, Clone, Serialize, Deserialize)]
245pub struct SpecificFunctionChoice {
246    pub name: String,
247}
248
249impl ToolChoice {
250    /// Create auto tool choice (default behavior)
251    pub fn auto() -> Self {
252        Self::Auto
253    }
254
255    /// Create none tool choice (disable tool calling)
256    pub fn none() -> Self {
257        Self::None
258    }
259
260    /// Create any tool choice (force at least one tool call)
261    pub fn any() -> Self {
262        Self::Any
263    }
264
265    /// Create specific function tool choice
266    pub fn function(name: String) -> Self {
267        Self::Specific(SpecificToolChoice {
268            tool_type: "function".to_owned(),
269            function: SpecificFunctionChoice { name },
270        })
271    }
272
273    /// Check if this tool choice allows parallel tool use
274    /// Based on Anthropic's parallel tool use guidelines
275    pub fn allows_parallel_tools(&self) -> bool {
276        match self {
277            // Auto allows parallel tools by default
278            Self::Auto => true,
279            // Any forces at least one tool, may allow parallel
280            Self::Any => true,
281            // Specific forces one particular tool, typically no parallel
282            Self::Specific(_) => false,
283            // None disables tools entirely
284            Self::None => false,
285        }
286    }
287
288    /// Get human-readable description of tool choice behavior
289    pub fn description(&self) -> &'static str {
290        match self {
291            Self::Auto => "Model decides when to use tools (allows parallel)",
292            Self::None => "No tools will be used",
293            Self::Any => "At least one tool must be used (allows parallel)",
294            Self::Specific(_) => "Specific tool must be used (no parallel)",
295        }
296    }
297
298    /// OpenAI-compatible providers that share the same tool_choice format
299    const OPENAI_STYLE_PROVIDERS: &'static [&'static str] = &[
300        "openai",
301        "deepseek",
302        "huggingface",
303        "mistral",
304        "openrouter",
305        "zai",
306        "moonshot",
307        "stepfun",
308        "evolink",
309        "lmstudio",
310        "llamacpp",
311    ];
312
313    /// Convert to provider-specific format
314    #[inline]
315    pub fn to_provider_format(&self, provider: &str) -> Value {
316        if Self::OPENAI_STYLE_PROVIDERS.contains(&provider) {
317            return self.to_openai_format();
318        }
319
320        match provider {
321            "anthropic" => self.to_anthropic_format(),
322            "gemini" => self.to_gemini_format(),
323            _ => self.to_openai_format(), // Default to OpenAI format
324        }
325    }
326
327    #[inline]
328    fn to_openai_format(&self) -> Value {
329        match self {
330            Self::Auto => json!("auto"),
331            Self::None => json!("none"),
332            Self::Any => json!("required"),
333            Self::Specific(choice) => json!(choice),
334        }
335    }
336
337    #[inline]
338    fn to_anthropic_format(&self) -> Value {
339        match self {
340            Self::Auto => json!({"type": "auto"}),
341            Self::None => json!({"type": "none"}),
342            Self::Any => json!({"type": "any"}),
343            Self::Specific(choice) => json!({"type": "tool", "name": &choice.function.name}),
344        }
345    }
346
347    #[inline]
348    fn to_gemini_format(&self) -> Value {
349        match self {
350            Self::Auto => json!({"mode": "auto"}),
351            Self::None => json!({"mode": "none"}),
352            Self::Any => json!({"mode": "any"}),
353            Self::Specific(choice) => {
354                json!({"mode": "any", "allowed_function_names": [&choice.function.name]})
355            }
356        }
357    }
358}
359
360/// Configuration for parallel tool use behavior
361/// Based on Anthropic's parallel tool use guidelines
362#[derive(Debug, Clone, Serialize, Deserialize)]
363pub struct ParallelToolConfig {
364    /// Whether to disable parallel tool use
365    /// When true, forces sequential tool execution
366    pub disable_parallel_tool_use: bool,
367
368    /// Maximum number of tools to execute in parallel
369    /// None means no limit (provider default)
370    pub max_parallel_tools: Option<usize>,
371
372    /// Whether to encourage parallel tool use in prompts
373    pub encourage_parallel: bool,
374}
375
376impl Default for ParallelToolConfig {
377    fn default() -> Self {
378        Self {
379            disable_parallel_tool_use: false,
380            max_parallel_tools: Some(5), // Reasonable default
381            encourage_parallel: true,
382        }
383    }
384}
385
386impl ParallelToolConfig {
387    /// Create configuration optimized for Anthropic models
388    pub fn anthropic_optimized() -> Self {
389        Self {
390            disable_parallel_tool_use: false,
391            max_parallel_tools: None, // Let Anthropic decide
392            encourage_parallel: true,
393        }
394    }
395
396    /// Create configuration for sequential tool use
397    pub fn sequential_only() -> Self {
398        Self {
399            disable_parallel_tool_use: true,
400            max_parallel_tools: Some(1),
401            encourage_parallel: false,
402        }
403    }
404}
vtcode_core/llm/provider/request.rs

vtcode_core/llm/provider/
request.rs