1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
use crate::config::types::{ReasoningEffortLevel, VerbosityLevel};
use serde::{Deserialize, Serialize};
use serde_json::{Value, json};
use std::sync::Arc;
use super::{Message, ToolDefinition};
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum PromptCacheProfile {
BudgetContinuation,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
#[serde(rename_all = "snake_case")]
pub enum AnthropicThinkingModeOverride {
#[default]
Inherit,
Disabled,
Adaptive,
ManualBudget(u32),
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
#[serde(rename_all = "snake_case")]
pub enum AnthropicThinkingDisplayOverride {
#[default]
Inherit,
Summarized,
Omitted,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
#[serde(rename_all = "snake_case")]
pub enum AnthropicOptionalStringOverride {
#[default]
Inherit,
Omit,
Explicit(String),
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
#[serde(rename_all = "snake_case")]
pub enum AnthropicOptionalU32Override {
#[default]
Inherit,
Omit,
Explicit(u32),
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
pub struct AnthropicRequestOverrides {
#[serde(default)]
pub thinking_mode: AnthropicThinkingModeOverride,
#[serde(default)]
pub thinking_display: AnthropicThinkingDisplayOverride,
#[serde(default)]
pub effort: AnthropicOptionalStringOverride,
#[serde(default)]
pub task_budget_tokens: AnthropicOptionalU32Override,
}
/// Universal LLM request structure
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct LLMRequest {
pub messages: Vec<Message>,
pub system_prompt: Option<Arc<String>>,
pub tools: Option<Arc<Vec<ToolDefinition>>>,
pub model: String,
pub max_tokens: Option<u32>,
pub temperature: Option<f32>,
pub stream: bool,
/// Optional structured output JSON schema to request from providers that support it
/// For Anthropic this will be sent as `output_config.format = { type: "json_schema", schema: ... }`
pub output_format: Option<Value>,
/// Tool choice configuration based on official API docs
/// Supports: "auto" (default), "none", "any", or specific tool selection
pub tool_choice: Option<ToolChoice>,
/// Whether to enable parallel tool calls (OpenAI specific)
pub parallel_tool_calls: Option<bool>,
/// Parallel tool use configuration following Anthropic best practices
pub parallel_tool_config: Option<Box<ParallelToolConfig>>,
/// Reasoning effort level for models that support it (none, minimal, low, medium, high, xhigh)
/// Applies to: Claude, GPT-5 family, Gemini, Qwen3, DeepSeek with reasoning capability
pub reasoning_effort: Option<ReasoningEffortLevel>,
/// Effort level for overall token usage (low, medium, high, xhigh, max)
/// Applies to: Anthropic adaptive-thinking models such as Claude Opus 4.7
/// Controls how many tokens Claude uses when responding, trading off between
/// response thoroughness and token efficiency.
pub effort: Option<String>,
/// Verbosity level for output text (low, medium, high)
/// Applies to: GPT-5.4-family Responses workflows and other models that support verbosity control
pub verbosity: Option<VerbosityLevel>,
/// Advanced generation parameters
pub do_sample: Option<bool>,
pub top_p: Option<f32>,
pub top_k: Option<i32>,
pub presence_penalty: Option<f32>,
pub frequency_penalty: Option<f32>,
pub stop_sequences: Option<Vec<String>>,
/// Optional budget for extended thinking (Anthropic specific)
/// Minimum value: 1024
pub thinking_budget: Option<u32>,
/// Optional beta headers for Anthropic (and potentially others)
pub betas: Option<Vec<String>>,
/// Optional provider-specific context management configuration (Anthropic compaction/editing).
pub context_management: Option<Value>,
/// Optional prefill text for the assistant response (Anthropic prefilling)
/// Incompatible with extended thinking
pub prefill: Option<String>,
/// Whether to enable character reinforcement (system prompt/prefill tagging)
pub character_reinforcement: bool,
/// Optional character name for reinforcement
pub character_name: Option<String>,
/// Optional coding agent specific settings
pub coding_agent_settings: Option<Box<CodingAgentSettings>>,
/// Optional turn metadata for git context (remote URLs, commit hash, etc.)
/// This is sent as X-Turn-Metadata header to providers that support it
pub metadata: Option<Value>,
/// Optional Responses API continuity pointer for server-side context chaining.
/// Used by providers that support stateful response continuation.
pub previous_response_id: Option<String>,
/// Optional Responses API storage flag.
/// When set, providers that support `store` pass this through directly.
pub response_store: Option<bool>,
/// Optional Responses API include fields (e.g. reasoning encrypted content).
/// Passed through only for providers/APIs that support include selectors.
pub responses_include: Option<Vec<String>>,
/// Optional native OpenAI `service_tier` request parameter.
/// Passed through only for native OpenAI endpoints that support service tiers.
pub service_tier: Option<String>,
/// Optional provider routing hint for prompt cache stickiness.
/// OpenAI uses this value to improve routing locality for repeated prefixes.
pub prompt_cache_key: Option<String>,
/// Optional request-scoped prompt cache profile for provider-specific TTL overrides.
pub prompt_cache_profile: Option<PromptCacheProfile>,
/// Optional Anthropic-specific request overrides used when request semantics must
/// not inherit VT Code's provider defaults, such as the Anthropic compatibility server.
pub anthropic_request_overrides: Option<AnthropicRequestOverrides>,
}
/// Optional overrides for standalone Responses compaction requests.
#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
pub struct ResponsesCompactionOptions {
/// Optional custom instructions appended to the derived replay instructions.
pub instructions: Option<String>,
/// Optional output token limit for the compaction response.
pub max_output_tokens: Option<u32>,
/// Optional reasoning effort override for the compaction pass.
pub reasoning_effort: Option<ReasoningEffortLevel>,
/// Optional verbosity override for the compaction output text settings.
pub verbosity: Option<VerbosityLevel>,
/// Optional include selectors override.
pub responses_include: Option<Vec<String>>,
/// Optional storage override.
pub response_store: Option<bool>,
/// Optional native OpenAI service tier override.
pub service_tier: Option<String>,
/// Optional prompt cache routing override.
pub prompt_cache_key: Option<String>,
}
/// Settings to refine model behavior for coding agent tasks
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct CodingAgentSettings {
/// Encourage the model to use XML tags for structured responses
pub force_xml_tags: bool,
/// Automatically prefill with `<thought>` to encourage reasoning
pub prefill_thought: bool,
/// Explicitly allow the model to say "I don't know" or "I am unsure"
pub allow_uncertainty: bool,
/// Enforce strict grounding to provided documents
pub strict_grounding: bool,
/// Optimize for long context by hoisting large messages and grounding in quotes
pub long_context_optimization: bool,
/// Wrap multiple file contexts in structured XML tags
pub use_xml_document_format: bool,
/// Inject instructions to find quotes before carrying out the task
pub force_quote_grounding: bool,
/// Optional specialized role for Claude (e.g., "Senior Software Architect")
pub role_specialization: Option<String>,
/// Enforce the use of `<thinking>` and `<answer>` tags for manual chain-of-thought
pub enforce_structured_thought: bool,
}
/// Tool choice configuration that works across different providers
/// Based on OpenAI, Anthropic, and Gemini API specifications
/// Follows Anthropic's tool use best practices for optimal performance
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
#[derive(Default)]
pub enum ToolChoice {
/// Let the model decide whether to call tools ("auto")
/// Default behavior - allows model to use tools when appropriate
#[default]
Auto,
/// Force the model to not call any tools ("none")
/// Useful for pure conversational responses without tool usage
None,
/// Force the model to call at least one tool ("any")
/// Ensures tool usage even when model might prefer direct response
Any,
/// Force the model to call a specific tool
/// Useful for directing model to use particular functionality
Specific(SpecificToolChoice),
}
/// Specific tool choice for forcing a particular function call
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SpecificToolChoice {
#[serde(rename = "type")]
pub tool_type: String, // "function"
pub function: SpecificFunctionChoice,
}
/// Specific function choice details
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SpecificFunctionChoice {
pub name: String,
}
impl ToolChoice {
/// Create auto tool choice (default behavior)
pub fn auto() -> Self {
Self::Auto
}
/// Create none tool choice (disable tool calling)
pub fn none() -> Self {
Self::None
}
/// Create any tool choice (force at least one tool call)
pub fn any() -> Self {
Self::Any
}
/// Create specific function tool choice
pub fn function(name: String) -> Self {
Self::Specific(SpecificToolChoice {
tool_type: "function".to_owned(),
function: SpecificFunctionChoice { name },
})
}
/// Check if this tool choice allows parallel tool use
/// Based on Anthropic's parallel tool use guidelines
pub fn allows_parallel_tools(&self) -> bool {
match self {
// Auto allows parallel tools by default
Self::Auto => true,
// Any forces at least one tool, may allow parallel
Self::Any => true,
// Specific forces one particular tool, typically no parallel
Self::Specific(_) => false,
// None disables tools entirely
Self::None => false,
}
}
/// Get human-readable description of tool choice behavior
pub fn description(&self) -> &'static str {
match self {
Self::Auto => "Model decides when to use tools (allows parallel)",
Self::None => "No tools will be used",
Self::Any => "At least one tool must be used (allows parallel)",
Self::Specific(_) => "Specific tool must be used (no parallel)",
}
}
/// OpenAI-compatible providers that share the same tool_choice format
const OPENAI_STYLE_PROVIDERS: &'static [&'static str] = &[
"openai",
"deepseek",
"huggingface",
"openrouter",
"zai",
"moonshot",
"lmstudio",
];
/// Convert to provider-specific format
#[inline]
pub fn to_provider_format(&self, provider: &str) -> Value {
if Self::OPENAI_STYLE_PROVIDERS.contains(&provider) {
return self.to_openai_format();
}
match provider {
"anthropic" => self.to_anthropic_format(),
"gemini" => self.to_gemini_format(),
_ => self.to_openai_format(), // Default to OpenAI format
}
}
#[inline]
fn to_openai_format(&self) -> Value {
match self {
Self::Auto => json!("auto"),
Self::None => json!("none"),
Self::Any => json!("required"),
Self::Specific(choice) => json!(choice),
}
}
#[inline]
fn to_anthropic_format(&self) -> Value {
match self {
Self::Auto => json!({"type": "auto"}),
Self::None => json!({"type": "none"}),
Self::Any => json!({"type": "any"}),
Self::Specific(choice) => json!({"type": "tool", "name": &choice.function.name}),
}
}
#[inline]
fn to_gemini_format(&self) -> Value {
match self {
Self::Auto => json!({"mode": "auto"}),
Self::None => json!({"mode": "none"}),
Self::Any => json!({"mode": "any"}),
Self::Specific(choice) => {
json!({"mode": "any", "allowed_function_names": [&choice.function.name]})
}
}
}
}
/// Configuration for parallel tool use behavior
/// Based on Anthropic's parallel tool use guidelines
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ParallelToolConfig {
/// Whether to disable parallel tool use
/// When true, forces sequential tool execution
pub disable_parallel_tool_use: bool,
/// Maximum number of tools to execute in parallel
/// None means no limit (provider default)
pub max_parallel_tools: Option<usize>,
/// Whether to encourage parallel tool use in prompts
pub encourage_parallel: bool,
}
impl Default for ParallelToolConfig {
fn default() -> Self {
Self {
disable_parallel_tool_use: false,
max_parallel_tools: Some(5), // Reasonable default
encourage_parallel: true,
}
}
}
impl ParallelToolConfig {
/// Create configuration optimized for Anthropic models
pub fn anthropic_optimized() -> Self {
Self {
disable_parallel_tool_use: false,
max_parallel_tools: None, // Let Anthropic decide
encourage_parallel: true,
}
}
/// Create configuration for sequential tool use
pub fn sequential_only() -> Self {
Self {
disable_parallel_tool_use: true,
max_parallel_tools: Some(1),
encourage_parallel: false,
}
}
}