vtcode_config/core/
prompt_cache.rs

1use crate::constants::prompt_cache;
2use anyhow::Context;
3use regex::Regex;
4use serde::{Deserialize, Serialize};
5use std::path::{Path, PathBuf};
6use std::time::Duration;
7
8/// Global prompt caching configuration loaded from vtcode.toml
9#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
10#[derive(Debug, Clone, Deserialize, Serialize)]
11pub struct PromptCachingConfig {
12    /// Enable prompt caching features globally
13    #[serde(default = "default_enabled")]
14    pub enabled: bool,
15
16    /// Base directory for local prompt cache storage (supports `~` expansion)
17    #[serde(default = "default_cache_dir")]
18    pub cache_dir: String,
19
20    /// Maximum number of cached prompt entries to retain on disk
21    #[serde(default = "default_max_entries")]
22    pub max_entries: usize,
23
24    /// Maximum age (in days) before cached entries are purged
25    #[serde(default = "default_max_age_days")]
26    pub max_age_days: u64,
27
28    /// Automatically evict stale entries on startup/shutdown
29    #[serde(default = "default_auto_cleanup")]
30    pub enable_auto_cleanup: bool,
31
32    /// Minimum quality score required before persisting an entry
33    #[serde(default = "default_min_quality_threshold")]
34    pub min_quality_threshold: f64,
35
36    /// Provider specific overrides
37    #[serde(default)]
38    pub providers: ProviderPromptCachingConfig,
39}
40
41impl Default for PromptCachingConfig {
42    fn default() -> Self {
43        Self {
44            enabled: default_enabled(),
45            cache_dir: default_cache_dir(),
46            max_entries: default_max_entries(),
47            max_age_days: default_max_age_days(),
48            enable_auto_cleanup: default_auto_cleanup(),
49            min_quality_threshold: default_min_quality_threshold(),
50            providers: ProviderPromptCachingConfig::default(),
51        }
52    }
53}
54
55impl PromptCachingConfig {
56    /// Resolve the configured cache directory to an absolute path
57    ///
58    /// - `~` is expanded to the user's home directory when available
59    /// - Relative paths are resolved against the provided workspace root when supplied
60    /// - Falls back to the configured string when neither applies
61    pub fn resolve_cache_dir(&self, workspace_root: Option<&Path>) -> PathBuf {
62        resolve_path(&self.cache_dir, workspace_root)
63    }
64}
65
66/// Per-provider configuration overrides
67#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
68#[derive(Debug, Clone, Deserialize, Serialize, Default)]
69pub struct ProviderPromptCachingConfig {
70    #[serde(default = "OpenAIPromptCacheSettings::default")]
71    pub openai: OpenAIPromptCacheSettings,
72
73    #[serde(default = "AnthropicPromptCacheSettings::default")]
74    pub anthropic: AnthropicPromptCacheSettings,
75
76    #[serde(default = "GeminiPromptCacheSettings::default")]
77    pub gemini: GeminiPromptCacheSettings,
78
79    #[serde(default = "OpenRouterPromptCacheSettings::default")]
80    pub openrouter: OpenRouterPromptCacheSettings,
81
82    #[serde(default = "MoonshotPromptCacheSettings::default")]
83    pub moonshot: MoonshotPromptCacheSettings,
84
85    #[serde(default = "XAIPromptCacheSettings::default")]
86    pub xai: XAIPromptCacheSettings,
87
88    #[serde(default = "DeepSeekPromptCacheSettings::default")]
89    pub deepseek: DeepSeekPromptCacheSettings,
90
91    #[serde(default = "ZaiPromptCacheSettings::default")]
92    pub zai: ZaiPromptCacheSettings,
93}
94
95/// OpenAI prompt caching controls (automatic with metrics)
96#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
97#[derive(Debug, Clone, Deserialize, Serialize)]
98pub struct OpenAIPromptCacheSettings {
99    #[serde(default = "default_true")]
100    pub enabled: bool,
101
102    #[serde(default = "default_openai_min_prefix_tokens")]
103    pub min_prefix_tokens: u32,
104
105    #[serde(default = "default_openai_idle_expiration")]
106    pub idle_expiration_seconds: u64,
107
108    #[serde(default = "default_true")]
109    pub surface_metrics: bool,
110
111    /// Optional prompt cache retention string to pass directly into OpenAI Responses API
112    /// Example: "24h" or "1d". If set, VT Code will include `prompt_cache_retention`
113    /// in the request body to extend the model-side prompt caching window.
114    #[serde(default)]
115    pub prompt_cache_retention: Option<String>,
116}
117
118impl Default for OpenAIPromptCacheSettings {
119    fn default() -> Self {
120        Self {
121            enabled: default_true(),
122            min_prefix_tokens: default_openai_min_prefix_tokens(),
123            idle_expiration_seconds: default_openai_idle_expiration(),
124            surface_metrics: default_true(),
125            prompt_cache_retention: None,
126        }
127    }
128}
129
130impl OpenAIPromptCacheSettings {
131    /// Validate OpenAI provider prompt cache settings. Returns Err if the retention value is invalid.
132    pub fn validate(&self) -> anyhow::Result<()> {
133        if let Some(ref retention) = self.prompt_cache_retention {
134            parse_retention_duration(retention)
135                .with_context(|| format!("Invalid prompt_cache_retention: {}", retention))?;
136        }
137        Ok(())
138    }
139}
140
141/// Anthropic Claude cache control settings
142#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
143#[derive(Debug, Clone, Deserialize, Serialize)]
144pub struct AnthropicPromptCacheSettings {
145    #[serde(default = "default_true")]
146    pub enabled: bool,
147
148    /// Default TTL in seconds for the first cache breakpoint (tools/system).
149    /// Anthropic only supports "5m" (300s) or "1h" (3600s) TTL formats.
150    /// Set to >= 3600 for 1-hour cache on tools and system prompts.
151    /// Default: 3600 (1 hour) - recommended for stable tool definitions
152    #[serde(default = "default_anthropic_tools_ttl")]
153    pub tools_ttl_seconds: u64,
154
155    /// TTL for subsequent cache breakpoints (messages).
156    /// Set to >= 3600 for 1-hour cache on messages.
157    /// Default: 300 (5 minutes) - recommended for frequently changing messages
158    #[serde(default = "default_anthropic_messages_ttl")]
159    pub messages_ttl_seconds: u64,
160
161    /// Maximum number of cache breakpoints to use (max 4 per Anthropic spec).
162    /// Default: 4
163    #[serde(default = "default_anthropic_max_breakpoints")]
164    pub max_breakpoints: u8,
165
166    /// Apply cache control to system prompts by default
167    #[serde(default = "default_true")]
168    pub cache_system_messages: bool,
169
170    /// Apply cache control to user messages exceeding threshold
171    #[serde(default = "default_true")]
172    pub cache_user_messages: bool,
173
174    /// Apply cache control to tool definitions by default
175    /// Default: true (tools are typically stable and benefit from longer caching)
176    #[serde(default = "default_true")]
177    pub cache_tool_definitions: bool,
178
179    /// Minimum message length (in characters) before applying cache control
180    /// to avoid caching very short messages that don't benefit from caching.
181    /// Default: 256 characters (~64 tokens)
182    #[serde(default = "default_min_message_length")]
183    pub min_message_length_for_cache: usize,
184}
185
186impl Default for AnthropicPromptCacheSettings {
187    fn default() -> Self {
188        Self {
189            enabled: default_true(),
190            tools_ttl_seconds: default_anthropic_tools_ttl(),
191            messages_ttl_seconds: default_anthropic_messages_ttl(),
192            max_breakpoints: default_anthropic_max_breakpoints(),
193            cache_system_messages: default_true(),
194            cache_user_messages: default_true(),
195            cache_tool_definitions: default_true(),
196            min_message_length_for_cache: default_min_message_length(),
197        }
198    }
199}
200
201/// Gemini API caching preferences
202#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
203#[derive(Debug, Clone, Deserialize, Serialize)]
204pub struct GeminiPromptCacheSettings {
205    #[serde(default = "default_true")]
206    pub enabled: bool,
207
208    #[serde(default = "default_gemini_mode")]
209    pub mode: GeminiPromptCacheMode,
210
211    #[serde(default = "default_gemini_min_prefix_tokens")]
212    pub min_prefix_tokens: u32,
213
214    /// TTL for explicit caches (ignored in implicit mode)
215    #[serde(default = "default_gemini_explicit_ttl")]
216    pub explicit_ttl_seconds: Option<u64>,
217}
218
219impl Default for GeminiPromptCacheSettings {
220    fn default() -> Self {
221        Self {
222            enabled: default_true(),
223            mode: GeminiPromptCacheMode::default(),
224            min_prefix_tokens: default_gemini_min_prefix_tokens(),
225            explicit_ttl_seconds: default_gemini_explicit_ttl(),
226        }
227    }
228}
229
230/// Gemini prompt caching mode selection
231#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
232#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
233#[serde(rename_all = "snake_case")]
234#[derive(Default)]
235pub enum GeminiPromptCacheMode {
236    #[default]
237    Implicit,
238    Explicit,
239    Off,
240}
241
242/// OpenRouter passthrough caching controls
243#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
244#[derive(Debug, Clone, Deserialize, Serialize)]
245pub struct OpenRouterPromptCacheSettings {
246    #[serde(default = "default_true")]
247    pub enabled: bool,
248
249    /// Propagate provider cache instructions automatically
250    #[serde(default = "default_true")]
251    pub propagate_provider_capabilities: bool,
252
253    /// Surface cache savings reported by OpenRouter
254    #[serde(default = "default_true")]
255    pub report_savings: bool,
256}
257
258impl Default for OpenRouterPromptCacheSettings {
259    fn default() -> Self {
260        Self {
261            enabled: default_true(),
262            propagate_provider_capabilities: default_true(),
263            report_savings: default_true(),
264        }
265    }
266}
267
268/// Moonshot prompt caching configuration (leverages server-side reuse)
269#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
270#[derive(Debug, Clone, Deserialize, Serialize)]
271pub struct MoonshotPromptCacheSettings {
272    #[serde(default = "default_moonshot_enabled")]
273    pub enabled: bool,
274}
275
276impl Default for MoonshotPromptCacheSettings {
277    fn default() -> Self {
278        Self {
279            enabled: default_moonshot_enabled(),
280        }
281    }
282}
283
284/// xAI prompt caching configuration (automatic platform-level cache)
285#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
286#[derive(Debug, Clone, Deserialize, Serialize)]
287pub struct XAIPromptCacheSettings {
288    #[serde(default = "default_true")]
289    pub enabled: bool,
290}
291
292impl Default for XAIPromptCacheSettings {
293    fn default() -> Self {
294        Self {
295            enabled: default_true(),
296        }
297    }
298}
299
300/// DeepSeek prompt caching configuration (automatic KV cache reuse)
301#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
302#[derive(Debug, Clone, Deserialize, Serialize)]
303pub struct DeepSeekPromptCacheSettings {
304    #[serde(default = "default_true")]
305    pub enabled: bool,
306
307    /// Emit cache hit/miss metrics from responses when available
308    #[serde(default = "default_true")]
309    pub surface_metrics: bool,
310}
311
312impl Default for DeepSeekPromptCacheSettings {
313    fn default() -> Self {
314        Self {
315            enabled: default_true(),
316            surface_metrics: default_true(),
317        }
318    }
319}
320
321/// Z.AI prompt caching configuration (disabled until platform exposes metrics)
322#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
323#[derive(Debug, Clone, Deserialize, Serialize)]
324pub struct ZaiPromptCacheSettings {
325    #[serde(default = "default_zai_enabled")]
326    pub enabled: bool,
327}
328
329impl Default for ZaiPromptCacheSettings {
330    fn default() -> Self {
331        Self {
332            enabled: default_zai_enabled(),
333        }
334    }
335}
336
337fn default_enabled() -> bool {
338    prompt_cache::DEFAULT_ENABLED
339}
340
341fn default_cache_dir() -> String {
342    format!("~/{path}", path = prompt_cache::DEFAULT_CACHE_DIR)
343}
344
345fn default_max_entries() -> usize {
346    prompt_cache::DEFAULT_MAX_ENTRIES
347}
348
349fn default_max_age_days() -> u64 {
350    prompt_cache::DEFAULT_MAX_AGE_DAYS
351}
352
353fn default_auto_cleanup() -> bool {
354    prompt_cache::DEFAULT_AUTO_CLEANUP
355}
356
357fn default_min_quality_threshold() -> f64 {
358    prompt_cache::DEFAULT_MIN_QUALITY_THRESHOLD
359}
360
361fn default_true() -> bool {
362    true
363}
364
365fn default_openai_min_prefix_tokens() -> u32 {
366    prompt_cache::OPENAI_MIN_PREFIX_TOKENS
367}
368
369fn default_openai_idle_expiration() -> u64 {
370    prompt_cache::OPENAI_IDLE_EXPIRATION_SECONDS
371}
372
373fn default_anthropic_default_ttl() -> u64 {
374    prompt_cache::ANTHROPIC_DEFAULT_TTL_SECONDS
375}
376
377fn default_anthropic_extended_ttl() -> Option<u64> {
378    Some(prompt_cache::ANTHROPIC_EXTENDED_TTL_SECONDS)
379}
380
381fn default_anthropic_tools_ttl() -> u64 {
382    prompt_cache::ANTHROPIC_TOOLS_TTL_SECONDS
383}
384
385fn default_anthropic_messages_ttl() -> u64 {
386    prompt_cache::ANTHROPIC_MESSAGES_TTL_SECONDS
387}
388
389fn default_anthropic_max_breakpoints() -> u8 {
390    prompt_cache::ANTHROPIC_MAX_BREAKPOINTS
391}
392
393#[allow(dead_code)]
394fn default_min_message_length() -> usize {
395    prompt_cache::ANTHROPIC_MIN_MESSAGE_LENGTH_FOR_CACHE
396}
397
398fn default_gemini_min_prefix_tokens() -> u32 {
399    prompt_cache::GEMINI_MIN_PREFIX_TOKENS
400}
401
402fn default_gemini_explicit_ttl() -> Option<u64> {
403    Some(prompt_cache::GEMINI_EXPLICIT_DEFAULT_TTL_SECONDS)
404}
405
406fn default_gemini_mode() -> GeminiPromptCacheMode {
407    GeminiPromptCacheMode::Implicit
408}
409
410fn default_zai_enabled() -> bool {
411    prompt_cache::ZAI_CACHE_ENABLED
412}
413
414fn default_moonshot_enabled() -> bool {
415    prompt_cache::MOONSHOT_CACHE_ENABLED
416}
417
418fn resolve_path(input: &str, workspace_root: Option<&Path>) -> PathBuf {
419    let trimmed = input.trim();
420    if trimmed.is_empty() {
421        return resolve_default_cache_dir();
422    }
423
424    if let Some(stripped) = trimmed
425        .strip_prefix("~/")
426        .or_else(|| trimmed.strip_prefix("~\\"))
427    {
428        if let Some(home) = dirs::home_dir() {
429            return home.join(stripped);
430        }
431        return PathBuf::from(stripped);
432    }
433
434    let candidate = Path::new(trimmed);
435    if candidate.is_absolute() {
436        return candidate.to_path_buf();
437    }
438
439    if let Some(root) = workspace_root {
440        return root.join(candidate);
441    }
442
443    candidate.to_path_buf()
444}
445
446fn resolve_default_cache_dir() -> PathBuf {
447    if let Some(home) = dirs::home_dir() {
448        return home.join(prompt_cache::DEFAULT_CACHE_DIR);
449    }
450    PathBuf::from(prompt_cache::DEFAULT_CACHE_DIR)
451}
452
453/// Parse a duration string into a std::time::Duration
454/// Acceptable formats: <number>[s|m|h|d], e.g., "30s", "5m", "24h", "1d".
455fn parse_retention_duration(input: &str) -> anyhow::Result<Duration> {
456    let input = input.trim();
457    if input.is_empty() {
458        anyhow::bail!("Empty retention string");
459    }
460
461    // Strict format: number + unit (s|m|h|d)
462    let re = Regex::new(r"^(\d+)([smhdSMHD])$").unwrap();
463    let caps = re
464        .captures(input)
465        .ok_or_else(|| anyhow::anyhow!("Invalid retention format; use <number>[s|m|h|d]"))?;
466
467    let value_str = caps.get(1).unwrap().as_str();
468    let unit = caps
469        .get(2)
470        .unwrap()
471        .as_str()
472        .chars()
473        .next()
474        .unwrap()
475        .to_ascii_lowercase();
476    let value: u64 = value_str
477        .parse()
478        .with_context(|| format!("Invalid numeric value in retention: {}", value_str))?;
479
480    let seconds = match unit {
481        's' => value,
482        'm' => value * 60,
483        'h' => value * 60 * 60,
484        'd' => value * 24 * 60 * 60,
485        _ => anyhow::bail!("Invalid retention unit; expected s,m,h,d"),
486    };
487
488    // Enforce a reasonable retention window: at least 1s and max 30 days
489    const MIN_SECONDS: u64 = 1;
490    const MAX_SECONDS: u64 = 30 * 24 * 60 * 60; // 30 days
491    if !((MIN_SECONDS..=MAX_SECONDS).contains(&seconds)) {
492        anyhow::bail!("prompt_cache_retention must be between 1s and 30d");
493    }
494
495    Ok(Duration::from_secs(seconds))
496}
497
498impl PromptCachingConfig {
499    /// Validate prompt cache config and provider overrides
500    pub fn validate(&self) -> anyhow::Result<()> {
501        // Validate OpenAI provider settings
502        self.providers.openai.validate()?;
503        Ok(())
504    }
505}
506
507#[cfg(test)]
508mod tests {
509    use super::*;
510    use assert_fs::TempDir;
511
512    #[test]
513    fn prompt_caching_defaults_align_with_constants() {
514        let cfg = PromptCachingConfig::default();
515        assert!(cfg.enabled);
516        assert_eq!(cfg.max_entries, prompt_cache::DEFAULT_MAX_ENTRIES);
517        assert_eq!(cfg.max_age_days, prompt_cache::DEFAULT_MAX_AGE_DAYS);
518        assert!(
519            (cfg.min_quality_threshold - prompt_cache::DEFAULT_MIN_QUALITY_THRESHOLD).abs()
520                < f64::EPSILON
521        );
522        assert!(cfg.providers.openai.enabled);
523        assert_eq!(
524            cfg.providers.openai.min_prefix_tokens,
525            prompt_cache::OPENAI_MIN_PREFIX_TOKENS
526        );
527        assert_eq!(
528            cfg.providers.anthropic.extended_ttl_seconds,
529            Some(prompt_cache::ANTHROPIC_EXTENDED_TTL_SECONDS)
530        );
531        assert_eq!(cfg.providers.gemini.mode, GeminiPromptCacheMode::Implicit);
532        assert!(cfg.providers.moonshot.enabled);
533        assert_eq!(cfg.providers.openai.prompt_cache_retention, None);
534    }
535
536    #[test]
537    fn resolve_cache_dir_expands_home() {
538        let cfg = PromptCachingConfig {
539            cache_dir: "~/.custom/cache".to_string(),
540            ..PromptCachingConfig::default()
541        };
542        let resolved = cfg.resolve_cache_dir(None);
543        if let Some(home) = dirs::home_dir() {
544            assert!(resolved.starts_with(home));
545        } else {
546            assert_eq!(resolved, PathBuf::from(".custom/cache"));
547        }
548    }
549
550    #[test]
551    fn resolve_cache_dir_uses_workspace_when_relative() {
552        let temp = TempDir::new().unwrap();
553        let workspace = temp.path();
554        let cfg = PromptCachingConfig {
555            cache_dir: "relative/cache".to_string(),
556            ..PromptCachingConfig::default()
557        };
558        let resolved = cfg.resolve_cache_dir(Some(workspace));
559        assert_eq!(resolved, workspace.join("relative/cache"));
560    }
561
562    #[test]
563    fn parse_retention_duration_valid_and_invalid() {
564        assert_eq!(
565            parse_retention_duration("24h").unwrap(),
566            std::time::Duration::from_secs(86400)
567        );
568        assert_eq!(
569            parse_retention_duration("5m").unwrap(),
570            std::time::Duration::from_secs(300)
571        );
572        assert_eq!(
573            parse_retention_duration("1s").unwrap(),
574            std::time::Duration::from_secs(1)
575        );
576        assert!(parse_retention_duration("0s").is_err());
577        assert!(parse_retention_duration("31d").is_err());
578        assert!(parse_retention_duration("abc").is_err());
579        assert!(parse_retention_duration("").is_err());
580        assert!(parse_retention_duration("10x").is_err());
581    }
582
583    #[test]
584    fn validate_prompt_cache_rejects_invalid_retention() {
585        let mut cfg = PromptCachingConfig::default();
586        cfg.providers.openai.prompt_cache_retention = Some("invalid".to_string());
587        assert!(cfg.validate().is_err());
588    }
589}