vtcode-core 0.116.4

Core library for VT Code - a Rust-based terminal coding agent
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
use crate::config::types::{ReasoningEffortLevel, VerbosityLevel};
use serde::{Deserialize, Serialize};
use serde_json::{Value, json};
use std::sync::Arc;

use super::{Message, ToolDefinition};

#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum PromptCacheProfile {
    BudgetContinuation,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
#[serde(rename_all = "snake_case")]
pub enum AnthropicThinkingModeOverride {
    #[default]
    Inherit,
    Disabled,
    Adaptive,
    ManualBudget(u32),
}

#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
#[serde(rename_all = "snake_case")]
pub enum AnthropicThinkingDisplayOverride {
    #[default]
    Inherit,
    Summarized,
    Omitted,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
#[serde(rename_all = "snake_case")]
pub enum AnthropicOptionalStringOverride {
    #[default]
    Inherit,
    Omit,
    Explicit(String),
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
#[serde(rename_all = "snake_case")]
pub enum AnthropicOptionalU32Override {
    #[default]
    Inherit,
    Omit,
    Explicit(u32),
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
pub struct AnthropicRequestOverrides {
    #[serde(default)]
    pub thinking_mode: AnthropicThinkingModeOverride,
    #[serde(default)]
    pub thinking_display: AnthropicThinkingDisplayOverride,
    #[serde(default)]
    pub effort: AnthropicOptionalStringOverride,
    #[serde(default)]
    pub task_budget_tokens: AnthropicOptionalU32Override,
}

/// Universal LLM request structure
#[derive(Debug, Clone, Serialize, Deserialize, Default)]

pub struct LLMRequest {
    pub messages: Vec<Message>,
    pub system_prompt: Option<Arc<String>>,
    pub tools: Option<Arc<Vec<ToolDefinition>>>,
    pub model: String,
    pub max_tokens: Option<u32>,
    pub temperature: Option<f32>,
    pub stream: bool,

    /// Optional structured output JSON schema to request from providers that support it
    /// For Anthropic this will be sent as `output_config.format = { type: "json_schema", schema: ... }`
    pub output_format: Option<Value>,

    /// Tool choice configuration based on official API docs
    /// Supports: "auto" (default), "none", "any", or specific tool selection
    pub tool_choice: Option<ToolChoice>,

    /// Whether to enable parallel tool calls (OpenAI specific)
    pub parallel_tool_calls: Option<bool>,

    /// Parallel tool use configuration following Anthropic best practices
    pub parallel_tool_config: Option<Box<ParallelToolConfig>>,

    /// Reasoning effort level for models that support it (none, minimal, low, medium, high, xhigh)
    /// Applies to: Claude, GPT-5 family, Gemini, Qwen3, DeepSeek with reasoning capability
    pub reasoning_effort: Option<ReasoningEffortLevel>,

    /// Effort level for overall token usage (low, medium, high, xhigh, max)
    /// Applies to: Anthropic adaptive-thinking models such as Claude Opus 4.7
    /// Controls how many tokens Claude uses when responding, trading off between
    /// response thoroughness and token efficiency.
    pub effort: Option<String>,

    /// Verbosity level for output text (low, medium, high)
    /// Applies to: GPT-5.4-family Responses workflows and other models that support verbosity control
    pub verbosity: Option<VerbosityLevel>,

    /// Advanced generation parameters
    pub do_sample: Option<bool>,
    pub top_p: Option<f32>,
    pub top_k: Option<i32>,
    pub presence_penalty: Option<f32>,
    pub frequency_penalty: Option<f32>,
    pub stop_sequences: Option<Vec<String>>,
    /// Optional budget for extended thinking (Anthropic specific)
    /// Minimum value: 1024
    pub thinking_budget: Option<u32>,

    /// Optional beta headers for Anthropic (and potentially others)
    pub betas: Option<Vec<String>>,

    /// Optional provider-specific context management configuration (Anthropic compaction/editing).
    pub context_management: Option<Value>,

    /// Optional prefill text for the assistant response (Anthropic prefilling)
    /// Incompatible with extended thinking
    pub prefill: Option<String>,

    /// Whether to enable character reinforcement (system prompt/prefill tagging)
    pub character_reinforcement: bool,

    /// Optional character name for reinforcement
    pub character_name: Option<String>,

    /// Optional coding agent specific settings
    pub coding_agent_settings: Option<Box<CodingAgentSettings>>,

    /// Optional turn metadata for git context (remote URLs, commit hash, etc.)
    /// This is sent as X-Turn-Metadata header to providers that support it
    pub metadata: Option<Value>,

    /// Optional Responses API continuity pointer for server-side context chaining.
    /// Used by providers that support stateful response continuation.
    pub previous_response_id: Option<String>,

    /// Optional Responses API storage flag.
    /// When set, providers that support `store` pass this through directly.
    pub response_store: Option<bool>,

    /// Optional Responses API include fields (e.g. reasoning encrypted content).
    /// Passed through only for providers/APIs that support include selectors.
    pub responses_include: Option<Vec<String>>,

    /// Optional native OpenAI `service_tier` request parameter.
    /// Passed through only for native OpenAI endpoints that support service tiers.
    pub service_tier: Option<String>,

    /// Optional provider routing hint for prompt cache stickiness.
    /// OpenAI uses this value to improve routing locality for repeated prefixes.
    pub prompt_cache_key: Option<String>,

    /// Optional request-scoped prompt cache profile for provider-specific TTL overrides.
    pub prompt_cache_profile: Option<PromptCacheProfile>,

    /// Optional Anthropic-specific request overrides used when request semantics must
    /// not inherit VT Code's provider defaults, such as the Anthropic compatibility server.
    pub anthropic_request_overrides: Option<AnthropicRequestOverrides>,
}

/// Optional overrides for standalone Responses compaction requests.
#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
pub struct ResponsesCompactionOptions {
    /// Optional custom instructions appended to the derived replay instructions.
    pub instructions: Option<String>,
    /// Optional output token limit for the compaction response.
    pub max_output_tokens: Option<u32>,
    /// Optional reasoning effort override for the compaction pass.
    pub reasoning_effort: Option<ReasoningEffortLevel>,
    /// Optional verbosity override for the compaction output text settings.
    pub verbosity: Option<VerbosityLevel>,
    /// Optional include selectors override.
    pub responses_include: Option<Vec<String>>,
    /// Optional storage override.
    pub response_store: Option<bool>,
    /// Optional native OpenAI service tier override.
    pub service_tier: Option<String>,
    /// Optional prompt cache routing override.
    pub prompt_cache_key: Option<String>,
}

/// Settings to refine model behavior for coding agent tasks
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct CodingAgentSettings {
    /// Encourage the model to use XML tags for structured responses
    pub force_xml_tags: bool,
    /// Automatically prefill with `<thought>` to encourage reasoning
    pub prefill_thought: bool,
    /// Explicitly allow the model to say "I don't know" or "I am unsure"
    pub allow_uncertainty: bool,
    /// Enforce strict grounding to provided documents
    pub strict_grounding: bool,
    /// Optimize for long context by hoisting large messages and grounding in quotes
    pub long_context_optimization: bool,
    /// Wrap multiple file contexts in structured XML tags
    pub use_xml_document_format: bool,
    /// Inject instructions to find quotes before carrying out the task
    pub force_quote_grounding: bool,
    /// Optional specialized role for Claude (e.g., "Senior Software Architect")
    pub role_specialization: Option<String>,
    /// Enforce the use of `<thinking>` and `<answer>` tags for manual chain-of-thought
    pub enforce_structured_thought: bool,
}

/// Tool choice configuration that works across different providers
/// Based on OpenAI, Anthropic, and Gemini API specifications
/// Follows Anthropic's tool use best practices for optimal performance
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
#[derive(Default)]
pub enum ToolChoice {
    /// Let the model decide whether to call tools ("auto")
    /// Default behavior - allows model to use tools when appropriate
    #[default]
    Auto,

    /// Force the model to not call any tools ("none")
    /// Useful for pure conversational responses without tool usage
    None,

    /// Force the model to call at least one tool ("any")
    /// Ensures tool usage even when model might prefer direct response
    Any,

    /// Force the model to call a specific tool
    /// Useful for directing model to use particular functionality
    Specific(SpecificToolChoice),
}

/// Specific tool choice for forcing a particular function call
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SpecificToolChoice {
    #[serde(rename = "type")]
    pub tool_type: String, // "function"

    pub function: SpecificFunctionChoice,
}

/// Specific function choice details
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SpecificFunctionChoice {
    pub name: String,
}

impl ToolChoice {
    /// Create auto tool choice (default behavior)
    pub fn auto() -> Self {
        Self::Auto
    }

    /// Create none tool choice (disable tool calling)
    pub fn none() -> Self {
        Self::None
    }

    /// Create any tool choice (force at least one tool call)
    pub fn any() -> Self {
        Self::Any
    }

    /// Create specific function tool choice
    pub fn function(name: String) -> Self {
        Self::Specific(SpecificToolChoice {
            tool_type: "function".to_owned(),
            function: SpecificFunctionChoice { name },
        })
    }

    /// Check if this tool choice allows parallel tool use
    /// Based on Anthropic's parallel tool use guidelines
    pub fn allows_parallel_tools(&self) -> bool {
        match self {
            // Auto allows parallel tools by default
            Self::Auto => true,
            // Any forces at least one tool, may allow parallel
            Self::Any => true,
            // Specific forces one particular tool, typically no parallel
            Self::Specific(_) => false,
            // None disables tools entirely
            Self::None => false,
        }
    }

    /// Get human-readable description of tool choice behavior
    pub fn description(&self) -> &'static str {
        match self {
            Self::Auto => "Model decides when to use tools (allows parallel)",
            Self::None => "No tools will be used",
            Self::Any => "At least one tool must be used (allows parallel)",
            Self::Specific(_) => "Specific tool must be used (no parallel)",
        }
    }

    /// OpenAI-compatible providers that share the same tool_choice format
    const OPENAI_STYLE_PROVIDERS: &'static [&'static str] = &[
        "openai",
        "deepseek",
        "huggingface",
        "mistral",
        "openrouter",
        "zai",
        "moonshot",
        "stepfun",
        "lmstudio",
        "llamacpp",
    ];

    /// Convert to provider-specific format
    #[inline]
    pub fn to_provider_format(&self, provider: &str) -> Value {
        if Self::OPENAI_STYLE_PROVIDERS.contains(&provider) {
            return self.to_openai_format();
        }

        match provider {
            "anthropic" => self.to_anthropic_format(),
            "gemini" => self.to_gemini_format(),
            _ => self.to_openai_format(), // Default to OpenAI format
        }
    }

    #[inline]
    fn to_openai_format(&self) -> Value {
        match self {
            Self::Auto => json!("auto"),
            Self::None => json!("none"),
            Self::Any => json!("required"),
            Self::Specific(choice) => json!(choice),
        }
    }

    #[inline]
    fn to_anthropic_format(&self) -> Value {
        match self {
            Self::Auto => json!({"type": "auto"}),
            Self::None => json!({"type": "none"}),
            Self::Any => json!({"type": "any"}),
            Self::Specific(choice) => json!({"type": "tool", "name": &choice.function.name}),
        }
    }

    #[inline]
    fn to_gemini_format(&self) -> Value {
        match self {
            Self::Auto => json!({"mode": "auto"}),
            Self::None => json!({"mode": "none"}),
            Self::Any => json!({"mode": "any"}),
            Self::Specific(choice) => {
                json!({"mode": "any", "allowed_function_names": [&choice.function.name]})
            }
        }
    }
}

/// Configuration for parallel tool use behavior
/// Based on Anthropic's parallel tool use guidelines
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ParallelToolConfig {
    /// Whether to disable parallel tool use
    /// When true, forces sequential tool execution
    pub disable_parallel_tool_use: bool,

    /// Maximum number of tools to execute in parallel
    /// None means no limit (provider default)
    pub max_parallel_tools: Option<usize>,

    /// Whether to encourage parallel tool use in prompts
    pub encourage_parallel: bool,
}

impl Default for ParallelToolConfig {
    fn default() -> Self {
        Self {
            disable_parallel_tool_use: false,
            max_parallel_tools: Some(5), // Reasonable default
            encourage_parallel: true,
        }
    }
}

impl ParallelToolConfig {
    /// Create configuration optimized for Anthropic models
    pub fn anthropic_optimized() -> Self {
        Self {
            disable_parallel_tool_use: false,
            max_parallel_tools: None, // Let Anthropic decide
            encourage_parallel: true,
        }
    }

    /// Create configuration for sequential tool use
    pub fn sequential_only() -> Self {
        Self {
            disable_parallel_tool_use: true,
            max_parallel_tools: Some(1),
            encourage_parallel: false,
        }
    }
}