vtcode_config/core/
prompt_cache.rs

1use crate::constants::prompt_cache;
2use anyhow::Context;
3use regex::Regex;
4use serde::{Deserialize, Serialize};
5use std::path::{Path, PathBuf};
6use std::time::Duration;
7
8/// Global prompt caching configuration loaded from vtcode.toml
9#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
10#[derive(Debug, Clone, Deserialize, Serialize)]
11pub struct PromptCachingConfig {
12    /// Enable prompt caching features globally
13    #[serde(default = "default_enabled")]
14    pub enabled: bool,
15
16    /// Base directory for local prompt cache storage (supports `~` expansion)
17    #[serde(default = "default_cache_dir")]
18    pub cache_dir: String,
19
20    /// Maximum number of cached prompt entries to retain on disk
21    #[serde(default = "default_max_entries")]
22    pub max_entries: usize,
23
24    /// Maximum age (in days) before cached entries are purged
25    #[serde(default = "default_max_age_days")]
26    pub max_age_days: u64,
27
28    /// Automatically evict stale entries on startup/shutdown
29    #[serde(default = "default_auto_cleanup")]
30    pub enable_auto_cleanup: bool,
31
32    /// Minimum quality score required before persisting an entry
33    #[serde(default = "default_min_quality_threshold")]
34    pub min_quality_threshold: f64,
35
36    /// Provider specific overrides
37    #[serde(default)]
38    pub providers: ProviderPromptCachingConfig,
39}
40
41impl Default for PromptCachingConfig {
42    fn default() -> Self {
43        Self {
44            enabled: default_enabled(),
45            cache_dir: default_cache_dir(),
46            max_entries: default_max_entries(),
47            max_age_days: default_max_age_days(),
48            enable_auto_cleanup: default_auto_cleanup(),
49            min_quality_threshold: default_min_quality_threshold(),
50            providers: ProviderPromptCachingConfig::default(),
51        }
52    }
53}
54
55impl PromptCachingConfig {
56    /// Resolve the configured cache directory to an absolute path
57    ///
58    /// - `~` is expanded to the user's home directory when available
59    /// - Relative paths are resolved against the provided workspace root when supplied
60    /// - Falls back to the configured string when neither applies
61    pub fn resolve_cache_dir(&self, workspace_root: Option<&Path>) -> PathBuf {
62        resolve_path(&self.cache_dir, workspace_root)
63    }
64}
65
66/// Per-provider configuration overrides
67#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
68#[derive(Debug, Clone, Deserialize, Serialize, Default)]
69pub struct ProviderPromptCachingConfig {
70    #[serde(default = "OpenAIPromptCacheSettings::default")]
71    pub openai: OpenAIPromptCacheSettings,
72
73    #[serde(default = "AnthropicPromptCacheSettings::default")]
74    pub anthropic: AnthropicPromptCacheSettings,
75
76    #[serde(default = "GeminiPromptCacheSettings::default")]
77    pub gemini: GeminiPromptCacheSettings,
78
79    #[serde(default = "OpenRouterPromptCacheSettings::default")]
80    pub openrouter: OpenRouterPromptCacheSettings,
81
82    #[serde(default = "MoonshotPromptCacheSettings::default")]
83    pub moonshot: MoonshotPromptCacheSettings,
84
85    #[serde(default = "XAIPromptCacheSettings::default")]
86    pub xai: XAIPromptCacheSettings,
87
88    #[serde(default = "DeepSeekPromptCacheSettings::default")]
89    pub deepseek: DeepSeekPromptCacheSettings,
90
91    #[serde(default = "ZaiPromptCacheSettings::default")]
92    pub zai: ZaiPromptCacheSettings,
93}
94
95/// OpenAI prompt caching controls (automatic with metrics)
96#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
97#[derive(Debug, Clone, Deserialize, Serialize)]
98pub struct OpenAIPromptCacheSettings {
99    #[serde(default = "default_true")]
100    pub enabled: bool,
101
102    #[serde(default = "default_openai_min_prefix_tokens")]
103    pub min_prefix_tokens: u32,
104
105    #[serde(default = "default_openai_idle_expiration")]
106    pub idle_expiration_seconds: u64,
107
108    #[serde(default = "default_true")]
109    pub surface_metrics: bool,
110
111    /// Optional prompt cache retention string to pass directly into OpenAI Responses API
112    /// Example: "24h" or "1d". If set, VT Code will include `prompt_cache_retention`
113    /// in the request body to extend the model-side prompt caching window.
114    #[serde(default)]
115    pub prompt_cache_retention: Option<String>,
116}
117
118impl Default for OpenAIPromptCacheSettings {
119    fn default() -> Self {
120        Self {
121            enabled: default_true(),
122            min_prefix_tokens: default_openai_min_prefix_tokens(),
123            idle_expiration_seconds: default_openai_idle_expiration(),
124            surface_metrics: default_true(),
125            prompt_cache_retention: None,
126        }
127    }
128}
129
130impl OpenAIPromptCacheSettings {
131    /// Validate OpenAI provider prompt cache settings. Returns Err if the retention value is invalid.
132    pub fn validate(&self) -> anyhow::Result<()> {
133        if let Some(ref retention) = self.prompt_cache_retention {
134            parse_retention_duration(retention)
135                .with_context(|| format!("Invalid prompt_cache_retention: {}", retention))?;
136        }
137        Ok(())
138    }
139}
140
141/// Anthropic Claude cache control settings
142#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
143#[derive(Debug, Clone, Deserialize, Serialize)]
144pub struct AnthropicPromptCacheSettings {
145    #[serde(default = "default_true")]
146    pub enabled: bool,
147
148    /// Default TTL in seconds (maps to "5m" in API).
149    /// Default: 300 seconds (5 minutes)
150    /// Note: Anthropic only supports "5m" or "1h" TTL formats.
151    #[serde(default = "default_anthropic_default_ttl")]
152    pub default_ttl_seconds: u64,
153
154    /// Optional extended TTL in seconds (maps to "1h" in API if >= 3600).
155    /// Set to 3600 or higher to use 1-hour caching.
156    /// Set to None or < 3600 to use default 5-minute caching.
157    /// Default: Some(3600) for 1-hour caching
158    /// Note: Using 1h requires the "extended-cache-ttl-2025-04-11" beta header.
159    #[serde(default = "default_anthropic_extended_ttl")]
160    pub extended_ttl_seconds: Option<u64>,
161
162    /// Maximum number of cache breakpoints to use (max 4 per Anthropic spec).
163    /// Default: 4
164    #[serde(default = "default_anthropic_max_breakpoints")]
165    pub max_breakpoints: u8,
166
167    /// Apply cache control to system prompts by default
168    #[serde(default = "default_true")]
169    pub cache_system_messages: bool,
170
171    /// Apply cache control to user messages exceeding threshold
172    #[serde(default = "default_true")]
173    pub cache_user_messages: bool,
174}
175
176impl Default for AnthropicPromptCacheSettings {
177    fn default() -> Self {
178        Self {
179            enabled: default_true(),
180            default_ttl_seconds: default_anthropic_default_ttl(),
181            extended_ttl_seconds: default_anthropic_extended_ttl(),
182            max_breakpoints: default_anthropic_max_breakpoints(),
183            cache_system_messages: default_true(),
184            cache_user_messages: default_true(),
185        }
186    }
187}
188
189/// Gemini API caching preferences
190#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
191#[derive(Debug, Clone, Deserialize, Serialize)]
192pub struct GeminiPromptCacheSettings {
193    #[serde(default = "default_true")]
194    pub enabled: bool,
195
196    #[serde(default = "default_gemini_mode")]
197    pub mode: GeminiPromptCacheMode,
198
199    #[serde(default = "default_gemini_min_prefix_tokens")]
200    pub min_prefix_tokens: u32,
201
202    /// TTL for explicit caches (ignored in implicit mode)
203    #[serde(default = "default_gemini_explicit_ttl")]
204    pub explicit_ttl_seconds: Option<u64>,
205}
206
207impl Default for GeminiPromptCacheSettings {
208    fn default() -> Self {
209        Self {
210            enabled: default_true(),
211            mode: GeminiPromptCacheMode::default(),
212            min_prefix_tokens: default_gemini_min_prefix_tokens(),
213            explicit_ttl_seconds: default_gemini_explicit_ttl(),
214        }
215    }
216}
217
218/// Gemini prompt caching mode selection
219#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
220#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
221#[serde(rename_all = "snake_case")]
222#[derive(Default)]
223pub enum GeminiPromptCacheMode {
224    #[default]
225    Implicit,
226    Explicit,
227    Off,
228}
229
230/// OpenRouter passthrough caching controls
231#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
232#[derive(Debug, Clone, Deserialize, Serialize)]
233pub struct OpenRouterPromptCacheSettings {
234    #[serde(default = "default_true")]
235    pub enabled: bool,
236
237    /// Propagate provider cache instructions automatically
238    #[serde(default = "default_true")]
239    pub propagate_provider_capabilities: bool,
240
241    /// Surface cache savings reported by OpenRouter
242    #[serde(default = "default_true")]
243    pub report_savings: bool,
244}
245
246impl Default for OpenRouterPromptCacheSettings {
247    fn default() -> Self {
248        Self {
249            enabled: default_true(),
250            propagate_provider_capabilities: default_true(),
251            report_savings: default_true(),
252        }
253    }
254}
255
256/// Moonshot prompt caching configuration (leverages server-side reuse)
257#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
258#[derive(Debug, Clone, Deserialize, Serialize)]
259pub struct MoonshotPromptCacheSettings {
260    #[serde(default = "default_moonshot_enabled")]
261    pub enabled: bool,
262}
263
264impl Default for MoonshotPromptCacheSettings {
265    fn default() -> Self {
266        Self {
267            enabled: default_moonshot_enabled(),
268        }
269    }
270}
271
272/// xAI prompt caching configuration (automatic platform-level cache)
273#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
274#[derive(Debug, Clone, Deserialize, Serialize)]
275pub struct XAIPromptCacheSettings {
276    #[serde(default = "default_true")]
277    pub enabled: bool,
278}
279
280impl Default for XAIPromptCacheSettings {
281    fn default() -> Self {
282        Self {
283            enabled: default_true(),
284        }
285    }
286}
287
288/// DeepSeek prompt caching configuration (automatic KV cache reuse)
289#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
290#[derive(Debug, Clone, Deserialize, Serialize)]
291pub struct DeepSeekPromptCacheSettings {
292    #[serde(default = "default_true")]
293    pub enabled: bool,
294
295    /// Emit cache hit/miss metrics from responses when available
296    #[serde(default = "default_true")]
297    pub surface_metrics: bool,
298}
299
300impl Default for DeepSeekPromptCacheSettings {
301    fn default() -> Self {
302        Self {
303            enabled: default_true(),
304            surface_metrics: default_true(),
305        }
306    }
307}
308
309/// Z.AI prompt caching configuration (disabled until platform exposes metrics)
310#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
311#[derive(Debug, Clone, Deserialize, Serialize)]
312pub struct ZaiPromptCacheSettings {
313    #[serde(default = "default_zai_enabled")]
314    pub enabled: bool,
315}
316
317impl Default for ZaiPromptCacheSettings {
318    fn default() -> Self {
319        Self {
320            enabled: default_zai_enabled(),
321        }
322    }
323}
324
325fn default_enabled() -> bool {
326    prompt_cache::DEFAULT_ENABLED
327}
328
329fn default_cache_dir() -> String {
330    format!("~/{path}", path = prompt_cache::DEFAULT_CACHE_DIR)
331}
332
333fn default_max_entries() -> usize {
334    prompt_cache::DEFAULT_MAX_ENTRIES
335}
336
337fn default_max_age_days() -> u64 {
338    prompt_cache::DEFAULT_MAX_AGE_DAYS
339}
340
341fn default_auto_cleanup() -> bool {
342    prompt_cache::DEFAULT_AUTO_CLEANUP
343}
344
345fn default_min_quality_threshold() -> f64 {
346    prompt_cache::DEFAULT_MIN_QUALITY_THRESHOLD
347}
348
349fn default_true() -> bool {
350    true
351}
352
353fn default_openai_min_prefix_tokens() -> u32 {
354    prompt_cache::OPENAI_MIN_PREFIX_TOKENS
355}
356
357fn default_openai_idle_expiration() -> u64 {
358    prompt_cache::OPENAI_IDLE_EXPIRATION_SECONDS
359}
360
361fn default_anthropic_default_ttl() -> u64 {
362    prompt_cache::ANTHROPIC_DEFAULT_TTL_SECONDS
363}
364
365fn default_anthropic_extended_ttl() -> Option<u64> {
366    Some(prompt_cache::ANTHROPIC_EXTENDED_TTL_SECONDS)
367}
368
369fn default_anthropic_max_breakpoints() -> u8 {
370    prompt_cache::ANTHROPIC_MAX_BREAKPOINTS
371}
372
373fn default_gemini_min_prefix_tokens() -> u32 {
374    prompt_cache::GEMINI_MIN_PREFIX_TOKENS
375}
376
377fn default_gemini_explicit_ttl() -> Option<u64> {
378    Some(prompt_cache::GEMINI_EXPLICIT_DEFAULT_TTL_SECONDS)
379}
380
381fn default_gemini_mode() -> GeminiPromptCacheMode {
382    GeminiPromptCacheMode::Implicit
383}
384
385fn default_zai_enabled() -> bool {
386    prompt_cache::ZAI_CACHE_ENABLED
387}
388
389fn default_moonshot_enabled() -> bool {
390    prompt_cache::MOONSHOT_CACHE_ENABLED
391}
392
393fn resolve_path(input: &str, workspace_root: Option<&Path>) -> PathBuf {
394    let trimmed = input.trim();
395    if trimmed.is_empty() {
396        return resolve_default_cache_dir();
397    }
398
399    if let Some(stripped) = trimmed
400        .strip_prefix("~/")
401        .or_else(|| trimmed.strip_prefix("~\\"))
402    {
403        if let Some(home) = dirs::home_dir() {
404            return home.join(stripped);
405        }
406        return PathBuf::from(stripped);
407    }
408
409    let candidate = Path::new(trimmed);
410    if candidate.is_absolute() {
411        return candidate.to_path_buf();
412    }
413
414    if let Some(root) = workspace_root {
415        return root.join(candidate);
416    }
417
418    candidate.to_path_buf()
419}
420
421fn resolve_default_cache_dir() -> PathBuf {
422    if let Some(home) = dirs::home_dir() {
423        return home.join(prompt_cache::DEFAULT_CACHE_DIR);
424    }
425    PathBuf::from(prompt_cache::DEFAULT_CACHE_DIR)
426}
427
428/// Parse a duration string into a std::time::Duration
429/// Acceptable formats: <number>[s|m|h|d], e.g., "30s", "5m", "24h", "1d".
430fn parse_retention_duration(input: &str) -> anyhow::Result<Duration> {
431    let input = input.trim();
432    if input.is_empty() {
433        anyhow::bail!("Empty retention string");
434    }
435
436    // Strict format: number + unit (s|m|h|d)
437    let re = Regex::new(r"^(\d+)([smhdSMHD])$").unwrap();
438    let caps = re
439        .captures(input)
440        .ok_or_else(|| anyhow::anyhow!("Invalid retention format; use <number>[s|m|h|d]"))?;
441
442    let value_str = caps.get(1).unwrap().as_str();
443    let unit = caps
444        .get(2)
445        .unwrap()
446        .as_str()
447        .chars()
448        .next()
449        .unwrap()
450        .to_ascii_lowercase();
451    let value: u64 = value_str
452        .parse()
453        .with_context(|| format!("Invalid numeric value in retention: {}", value_str))?;
454
455    let seconds = match unit {
456        's' => value,
457        'm' => value * 60,
458        'h' => value * 60 * 60,
459        'd' => value * 24 * 60 * 60,
460        _ => anyhow::bail!("Invalid retention unit; expected s,m,h,d"),
461    };
462
463    // Enforce a reasonable retention window: at least 1s and max 30 days
464    const MIN_SECONDS: u64 = 1;
465    const MAX_SECONDS: u64 = 30 * 24 * 60 * 60; // 30 days
466    if !((MIN_SECONDS..=MAX_SECONDS).contains(&seconds)) {
467        anyhow::bail!("prompt_cache_retention must be between 1s and 30d");
468    }
469
470    Ok(Duration::from_secs(seconds))
471}
472
473impl PromptCachingConfig {
474    /// Validate prompt cache config and provider overrides
475    pub fn validate(&self) -> anyhow::Result<()> {
476        // Validate OpenAI provider settings
477        self.providers.openai.validate()?;
478        Ok(())
479    }
480}
481
482#[cfg(test)]
483mod tests {
484    use super::*;
485    use assert_fs::TempDir;
486
487    #[test]
488    fn prompt_caching_defaults_align_with_constants() {
489        let cfg = PromptCachingConfig::default();
490        assert!(cfg.enabled);
491        assert_eq!(cfg.max_entries, prompt_cache::DEFAULT_MAX_ENTRIES);
492        assert_eq!(cfg.max_age_days, prompt_cache::DEFAULT_MAX_AGE_DAYS);
493        assert!(
494            (cfg.min_quality_threshold - prompt_cache::DEFAULT_MIN_QUALITY_THRESHOLD).abs()
495                < f64::EPSILON
496        );
497        assert!(cfg.providers.openai.enabled);
498        assert_eq!(
499            cfg.providers.openai.min_prefix_tokens,
500            prompt_cache::OPENAI_MIN_PREFIX_TOKENS
501        );
502        assert_eq!(
503            cfg.providers.anthropic.extended_ttl_seconds,
504            Some(prompt_cache::ANTHROPIC_EXTENDED_TTL_SECONDS)
505        );
506        assert_eq!(cfg.providers.gemini.mode, GeminiPromptCacheMode::Implicit);
507        assert!(cfg.providers.moonshot.enabled);
508        assert_eq!(cfg.providers.openai.prompt_cache_retention, None);
509    }
510
511    #[test]
512    fn resolve_cache_dir_expands_home() {
513        let cfg = PromptCachingConfig {
514            cache_dir: "~/.custom/cache".to_string(),
515            ..PromptCachingConfig::default()
516        };
517        let resolved = cfg.resolve_cache_dir(None);
518        if let Some(home) = dirs::home_dir() {
519            assert!(resolved.starts_with(home));
520        } else {
521            assert_eq!(resolved, PathBuf::from(".custom/cache"));
522        }
523    }
524
525    #[test]
526    fn resolve_cache_dir_uses_workspace_when_relative() {
527        let temp = TempDir::new().unwrap();
528        let workspace = temp.path();
529        let cfg = PromptCachingConfig {
530            cache_dir: "relative/cache".to_string(),
531            ..PromptCachingConfig::default()
532        };
533        let resolved = cfg.resolve_cache_dir(Some(workspace));
534        assert_eq!(resolved, workspace.join("relative/cache"));
535    }
536
537    #[test]
538    fn parse_retention_duration_valid_and_invalid() {
539        assert_eq!(
540            parse_retention_duration("24h").unwrap(),
541            std::time::Duration::from_secs(86400)
542        );
543        assert_eq!(
544            parse_retention_duration("5m").unwrap(),
545            std::time::Duration::from_secs(300)
546        );
547        assert_eq!(
548            parse_retention_duration("1s").unwrap(),
549            std::time::Duration::from_secs(1)
550        );
551        assert!(parse_retention_duration("0s").is_err());
552        assert!(parse_retention_duration("31d").is_err());
553        assert!(parse_retention_duration("abc").is_err());
554        assert!(parse_retention_duration("").is_err());
555        assert!(parse_retention_duration("10x").is_err());
556    }
557
558    #[test]
559    fn validate_prompt_cache_rejects_invalid_retention() {
560        let mut cfg = PromptCachingConfig::default();
561        cfg.providers.openai.prompt_cache_retention = Some("invalid".to_string());
562        assert!(cfg.validate().is_err());
563    }
564}