vtcode_config/core/
prompt_cache.rs

1use crate::constants::prompt_cache;
2use anyhow::Context;
3use regex::Regex;
4use serde::{Deserialize, Serialize};
5use std::path::{Path, PathBuf};
6use std::time::Duration;
7
8/// Global prompt caching configuration loaded from vtcode.toml
9#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
10#[derive(Debug, Clone, Deserialize, Serialize)]
11pub struct PromptCachingConfig {
12    /// Enable prompt caching features globally
13    #[serde(default = "default_enabled")]
14    pub enabled: bool,
15
16    /// Base directory for local prompt cache storage (supports `~` expansion)
17    #[serde(default = "default_cache_dir")]
18    pub cache_dir: String,
19
20    /// Maximum number of cached prompt entries to retain on disk
21    #[serde(default = "default_max_entries")]
22    pub max_entries: usize,
23
24    /// Maximum age (in days) before cached entries are purged
25    #[serde(default = "default_max_age_days")]
26    pub max_age_days: u64,
27
28    /// Automatically evict stale entries on startup/shutdown
29    #[serde(default = "default_auto_cleanup")]
30    pub enable_auto_cleanup: bool,
31
32    /// Minimum quality score required before persisting an entry
33    #[serde(default = "default_min_quality_threshold")]
34    pub min_quality_threshold: f64,
35
36    /// Provider specific overrides
37    #[serde(default)]
38    pub providers: ProviderPromptCachingConfig,
39}
40
41impl Default for PromptCachingConfig {
42    fn default() -> Self {
43        Self {
44            enabled: default_enabled(),
45            cache_dir: default_cache_dir(),
46            max_entries: default_max_entries(),
47            max_age_days: default_max_age_days(),
48            enable_auto_cleanup: default_auto_cleanup(),
49            min_quality_threshold: default_min_quality_threshold(),
50            providers: ProviderPromptCachingConfig::default(),
51        }
52    }
53}
54
55impl PromptCachingConfig {
56    /// Resolve the configured cache directory to an absolute path
57    ///
58    /// - `~` is expanded to the user's home directory when available
59    /// - Relative paths are resolved against the provided workspace root when supplied
60    /// - Falls back to the configured string when neither applies
61    pub fn resolve_cache_dir(&self, workspace_root: Option<&Path>) -> PathBuf {
62        resolve_path(&self.cache_dir, workspace_root)
63    }
64}
65
66/// Per-provider configuration overrides
67#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
68#[derive(Debug, Clone, Deserialize, Serialize, Default)]
69pub struct ProviderPromptCachingConfig {
70    #[serde(default = "OpenAIPromptCacheSettings::default")]
71    pub openai: OpenAIPromptCacheSettings,
72
73    #[serde(default = "AnthropicPromptCacheSettings::default")]
74    pub anthropic: AnthropicPromptCacheSettings,
75
76    #[serde(default = "GeminiPromptCacheSettings::default")]
77    pub gemini: GeminiPromptCacheSettings,
78
79    #[serde(default = "OpenRouterPromptCacheSettings::default")]
80    pub openrouter: OpenRouterPromptCacheSettings,
81
82    #[serde(default = "MoonshotPromptCacheSettings::default")]
83    pub moonshot: MoonshotPromptCacheSettings,
84
85    #[serde(default = "XAIPromptCacheSettings::default")]
86    pub xai: XAIPromptCacheSettings,
87
88    #[serde(default = "DeepSeekPromptCacheSettings::default")]
89    pub deepseek: DeepSeekPromptCacheSettings,
90
91    #[serde(default = "ZaiPromptCacheSettings::default")]
92    pub zai: ZaiPromptCacheSettings,
93}
94
95/// OpenAI prompt caching controls (automatic with metrics)
96#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
97#[derive(Debug, Clone, Deserialize, Serialize)]
98pub struct OpenAIPromptCacheSettings {
99    #[serde(default = "default_true")]
100    pub enabled: bool,
101
102    #[serde(default = "default_openai_min_prefix_tokens")]
103    pub min_prefix_tokens: u32,
104
105    #[serde(default = "default_openai_idle_expiration")]
106    pub idle_expiration_seconds: u64,
107
108    #[serde(default = "default_true")]
109    pub surface_metrics: bool,
110
111    /// Optional prompt cache retention string to pass directly into OpenAI Responses API
112    /// Example: "24h" or "1d". If set, VT Code will include `prompt_cache_retention`
113    /// in the request body to extend the model-side prompt caching window.
114    #[serde(default)]
115    pub prompt_cache_retention: Option<String>,
116}
117
118impl Default for OpenAIPromptCacheSettings {
119    fn default() -> Self {
120        Self {
121            enabled: default_true(),
122            min_prefix_tokens: default_openai_min_prefix_tokens(),
123            idle_expiration_seconds: default_openai_idle_expiration(),
124            surface_metrics: default_true(),
125            prompt_cache_retention: None,
126        }
127    }
128}
129
130impl OpenAIPromptCacheSettings {
131    /// Validate OpenAI provider prompt cache settings. Returns Err if the retention value is invalid.
132    pub fn validate(&self) -> anyhow::Result<()> {
133        if let Some(ref retention) = self.prompt_cache_retention {
134            parse_retention_duration(retention)
135                .with_context(|| format!("Invalid prompt_cache_retention: {}", retention))?;
136        }
137        Ok(())
138    }
139}
140
141/// Anthropic Claude cache control settings
142#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
143#[derive(Debug, Clone, Deserialize, Serialize)]
144pub struct AnthropicPromptCacheSettings {
145    #[serde(default = "default_true")]
146    pub enabled: bool,
147
148    /// Default TTL in seconds for the first cache breakpoint (tools/system).
149    /// Anthropic only supports "5m" (300s) or "1h" (3600s) TTL formats.
150    /// Set to >= 3600 for 1-hour cache on tools and system prompts.
151    /// Default: 3600 (1 hour) - recommended for stable tool definitions
152    #[serde(default = "default_anthropic_tools_ttl")]
153    pub tools_ttl_seconds: u64,
154
155    /// TTL for subsequent cache breakpoints (messages).
156    /// Set to >= 3600 for 1-hour cache on messages.
157    /// Default: 300 (5 minutes) - recommended for frequently changing messages
158    #[serde(default = "default_anthropic_messages_ttl")]
159    pub messages_ttl_seconds: u64,
160
161    /// Maximum number of cache breakpoints to use (max 4 per Anthropic spec).
162    /// Default: 4
163    #[serde(default = "default_anthropic_max_breakpoints")]
164    pub max_breakpoints: u8,
165
166    /// Apply cache control to system prompts by default
167    #[serde(default = "default_true")]
168    pub cache_system_messages: bool,
169
170    /// Apply cache control to user messages exceeding threshold
171    #[serde(default = "default_true")]
172    pub cache_user_messages: bool,
173
174    /// Apply cache control to tool definitions by default
175    /// Default: true (tools are typically stable and benefit from longer caching)
176    #[serde(default = "default_true")]
177    pub cache_tool_definitions: bool,
178
179    /// Minimum message length (in characters) before applying cache control
180    /// to avoid caching very short messages that don't benefit from caching.
181    /// Default: 256 characters (~64 tokens)
182    #[serde(default = "default_min_message_length")]
183    pub min_message_length_for_cache: usize,
184
185    /// Extended TTL for Anthropic prompt caching (in seconds)
186    /// Set to >= 3600 for 1-hour cache on messages
187    #[serde(default = "default_anthropic_extended_ttl")]
188    pub extended_ttl_seconds: Option<u64>,
189}
190
191impl Default for AnthropicPromptCacheSettings {
192    fn default() -> Self {
193        Self {
194            enabled: default_true(),
195            tools_ttl_seconds: default_anthropic_tools_ttl(),
196            messages_ttl_seconds: default_anthropic_messages_ttl(),
197            max_breakpoints: default_anthropic_max_breakpoints(),
198            cache_system_messages: default_true(),
199            cache_user_messages: default_true(),
200            cache_tool_definitions: default_true(),
201            min_message_length_for_cache: default_min_message_length(),
202            extended_ttl_seconds: default_anthropic_extended_ttl(),
203        }
204    }
205}
206
207/// Gemini API caching preferences
208#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
209#[derive(Debug, Clone, Deserialize, Serialize)]
210pub struct GeminiPromptCacheSettings {
211    #[serde(default = "default_true")]
212    pub enabled: bool,
213
214    #[serde(default = "default_gemini_mode")]
215    pub mode: GeminiPromptCacheMode,
216
217    #[serde(default = "default_gemini_min_prefix_tokens")]
218    pub min_prefix_tokens: u32,
219
220    /// TTL for explicit caches (ignored in implicit mode)
221    #[serde(default = "default_gemini_explicit_ttl")]
222    pub explicit_ttl_seconds: Option<u64>,
223}
224
225impl Default for GeminiPromptCacheSettings {
226    fn default() -> Self {
227        Self {
228            enabled: default_true(),
229            mode: GeminiPromptCacheMode::default(),
230            min_prefix_tokens: default_gemini_min_prefix_tokens(),
231            explicit_ttl_seconds: default_gemini_explicit_ttl(),
232        }
233    }
234}
235
236/// Gemini prompt caching mode selection
237#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
238#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
239#[serde(rename_all = "snake_case")]
240#[derive(Default)]
241pub enum GeminiPromptCacheMode {
242    #[default]
243    Implicit,
244    Explicit,
245    Off,
246}
247
248/// OpenRouter passthrough caching controls
249#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
250#[derive(Debug, Clone, Deserialize, Serialize)]
251pub struct OpenRouterPromptCacheSettings {
252    #[serde(default = "default_true")]
253    pub enabled: bool,
254
255    /// Propagate provider cache instructions automatically
256    #[serde(default = "default_true")]
257    pub propagate_provider_capabilities: bool,
258
259    /// Surface cache savings reported by OpenRouter
260    #[serde(default = "default_true")]
261    pub report_savings: bool,
262}
263
264impl Default for OpenRouterPromptCacheSettings {
265    fn default() -> Self {
266        Self {
267            enabled: default_true(),
268            propagate_provider_capabilities: default_true(),
269            report_savings: default_true(),
270        }
271    }
272}
273
274/// Moonshot prompt caching configuration (leverages server-side reuse)
275#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
276#[derive(Debug, Clone, Deserialize, Serialize)]
277pub struct MoonshotPromptCacheSettings {
278    #[serde(default = "default_moonshot_enabled")]
279    pub enabled: bool,
280}
281
282impl Default for MoonshotPromptCacheSettings {
283    fn default() -> Self {
284        Self {
285            enabled: default_moonshot_enabled(),
286        }
287    }
288}
289
290/// xAI prompt caching configuration (automatic platform-level cache)
291#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
292#[derive(Debug, Clone, Deserialize, Serialize)]
293pub struct XAIPromptCacheSettings {
294    #[serde(default = "default_true")]
295    pub enabled: bool,
296}
297
298impl Default for XAIPromptCacheSettings {
299    fn default() -> Self {
300        Self {
301            enabled: default_true(),
302        }
303    }
304}
305
306/// DeepSeek prompt caching configuration (automatic KV cache reuse)
307#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
308#[derive(Debug, Clone, Deserialize, Serialize)]
309pub struct DeepSeekPromptCacheSettings {
310    #[serde(default = "default_true")]
311    pub enabled: bool,
312
313    /// Emit cache hit/miss metrics from responses when available
314    #[serde(default = "default_true")]
315    pub surface_metrics: bool,
316}
317
318impl Default for DeepSeekPromptCacheSettings {
319    fn default() -> Self {
320        Self {
321            enabled: default_true(),
322            surface_metrics: default_true(),
323        }
324    }
325}
326
327/// Z.AI prompt caching configuration (disabled until platform exposes metrics)
328#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
329#[derive(Debug, Clone, Deserialize, Serialize)]
330pub struct ZaiPromptCacheSettings {
331    #[serde(default = "default_zai_enabled")]
332    pub enabled: bool,
333}
334
335impl Default for ZaiPromptCacheSettings {
336    fn default() -> Self {
337        Self {
338            enabled: default_zai_enabled(),
339        }
340    }
341}
342
343fn default_enabled() -> bool {
344    prompt_cache::DEFAULT_ENABLED
345}
346
347fn default_cache_dir() -> String {
348    format!("~/{path}", path = prompt_cache::DEFAULT_CACHE_DIR)
349}
350
351fn default_max_entries() -> usize {
352    prompt_cache::DEFAULT_MAX_ENTRIES
353}
354
355fn default_max_age_days() -> u64 {
356    prompt_cache::DEFAULT_MAX_AGE_DAYS
357}
358
359fn default_auto_cleanup() -> bool {
360    prompt_cache::DEFAULT_AUTO_CLEANUP
361}
362
363fn default_min_quality_threshold() -> f64 {
364    prompt_cache::DEFAULT_MIN_QUALITY_THRESHOLD
365}
366
367fn default_true() -> bool {
368    true
369}
370
371fn default_openai_min_prefix_tokens() -> u32 {
372    prompt_cache::OPENAI_MIN_PREFIX_TOKENS
373}
374
375fn default_openai_idle_expiration() -> u64 {
376    prompt_cache::OPENAI_IDLE_EXPIRATION_SECONDS
377}
378
379#[allow(dead_code)]
380fn default_anthropic_default_ttl() -> u64 {
381    prompt_cache::ANTHROPIC_DEFAULT_TTL_SECONDS
382}
383
384#[allow(dead_code)]
385fn default_anthropic_extended_ttl() -> Option<u64> {
386    Some(prompt_cache::ANTHROPIC_EXTENDED_TTL_SECONDS)
387}
388
389fn default_anthropic_tools_ttl() -> u64 {
390    prompt_cache::ANTHROPIC_TOOLS_TTL_SECONDS
391}
392
393fn default_anthropic_messages_ttl() -> u64 {
394    prompt_cache::ANTHROPIC_MESSAGES_TTL_SECONDS
395}
396
397fn default_anthropic_max_breakpoints() -> u8 {
398    prompt_cache::ANTHROPIC_MAX_BREAKPOINTS
399}
400
401#[allow(dead_code)]
402fn default_min_message_length() -> usize {
403    prompt_cache::ANTHROPIC_MIN_MESSAGE_LENGTH_FOR_CACHE
404}
405
406fn default_gemini_min_prefix_tokens() -> u32 {
407    prompt_cache::GEMINI_MIN_PREFIX_TOKENS
408}
409
410fn default_gemini_explicit_ttl() -> Option<u64> {
411    Some(prompt_cache::GEMINI_EXPLICIT_DEFAULT_TTL_SECONDS)
412}
413
414fn default_gemini_mode() -> GeminiPromptCacheMode {
415    GeminiPromptCacheMode::Implicit
416}
417
418fn default_zai_enabled() -> bool {
419    prompt_cache::ZAI_CACHE_ENABLED
420}
421
422fn default_moonshot_enabled() -> bool {
423    prompt_cache::MOONSHOT_CACHE_ENABLED
424}
425
426fn resolve_path(input: &str, workspace_root: Option<&Path>) -> PathBuf {
427    let trimmed = input.trim();
428    if trimmed.is_empty() {
429        return resolve_default_cache_dir();
430    }
431
432    if let Some(stripped) = trimmed
433        .strip_prefix("~/")
434        .or_else(|| trimmed.strip_prefix("~\\"))
435    {
436        if let Some(home) = dirs::home_dir() {
437            return home.join(stripped);
438        }
439        return PathBuf::from(stripped);
440    }
441
442    let candidate = Path::new(trimmed);
443    if candidate.is_absolute() {
444        return candidate.to_path_buf();
445    }
446
447    if let Some(root) = workspace_root {
448        return root.join(candidate);
449    }
450
451    candidate.to_path_buf()
452}
453
454fn resolve_default_cache_dir() -> PathBuf {
455    if let Some(home) = dirs::home_dir() {
456        return home.join(prompt_cache::DEFAULT_CACHE_DIR);
457    }
458    PathBuf::from(prompt_cache::DEFAULT_CACHE_DIR)
459}
460
461/// Parse a duration string into a std::time::Duration
462/// Acceptable formats: <number>[s|m|h|d], e.g., "30s", "5m", "24h", "1d".
463fn parse_retention_duration(input: &str) -> anyhow::Result<Duration> {
464    let input = input.trim();
465    if input.is_empty() {
466        anyhow::bail!("Empty retention string");
467    }
468
469    // Strict format: number + unit (s|m|h|d)
470    let re = Regex::new(r"^(\d+)([smhdSMHD])$").unwrap();
471    let caps = re
472        .captures(input)
473        .ok_or_else(|| anyhow::anyhow!("Invalid retention format; use <number>[s|m|h|d]"))?;
474
475    let value_str = caps.get(1).unwrap().as_str();
476    let unit = caps
477        .get(2)
478        .unwrap()
479        .as_str()
480        .chars()
481        .next()
482        .unwrap()
483        .to_ascii_lowercase();
484    let value: u64 = value_str
485        .parse()
486        .with_context(|| format!("Invalid numeric value in retention: {}", value_str))?;
487
488    let seconds = match unit {
489        's' => value,
490        'm' => value * 60,
491        'h' => value * 60 * 60,
492        'd' => value * 24 * 60 * 60,
493        _ => anyhow::bail!("Invalid retention unit; expected s,m,h,d"),
494    };
495
496    // Enforce a reasonable retention window: at least 1s and max 30 days
497    const MIN_SECONDS: u64 = 1;
498    const MAX_SECONDS: u64 = 30 * 24 * 60 * 60; // 30 days
499    if !((MIN_SECONDS..=MAX_SECONDS).contains(&seconds)) {
500        anyhow::bail!("prompt_cache_retention must be between 1s and 30d");
501    }
502
503    Ok(Duration::from_secs(seconds))
504}
505
506impl PromptCachingConfig {
507    /// Validate prompt cache config and provider overrides
508    pub fn validate(&self) -> anyhow::Result<()> {
509        // Validate OpenAI provider settings
510        self.providers.openai.validate()?;
511        Ok(())
512    }
513}
514
515#[cfg(test)]
516mod tests {
517    use super::*;
518    use assert_fs::TempDir;
519
520    #[test]
521    fn prompt_caching_defaults_align_with_constants() {
522        let cfg = PromptCachingConfig::default();
523        assert!(cfg.enabled);
524        assert_eq!(cfg.max_entries, prompt_cache::DEFAULT_MAX_ENTRIES);
525        assert_eq!(cfg.max_age_days, prompt_cache::DEFAULT_MAX_AGE_DAYS);
526        assert!(
527            (cfg.min_quality_threshold - prompt_cache::DEFAULT_MIN_QUALITY_THRESHOLD).abs()
528                < f64::EPSILON
529        );
530        assert!(cfg.providers.openai.enabled);
531        assert_eq!(
532            cfg.providers.openai.min_prefix_tokens,
533            prompt_cache::OPENAI_MIN_PREFIX_TOKENS
534        );
535        assert_eq!(
536            cfg.providers.anthropic.extended_ttl_seconds,
537            Some(prompt_cache::ANTHROPIC_EXTENDED_TTL_SECONDS)
538        );
539        assert_eq!(cfg.providers.gemini.mode, GeminiPromptCacheMode::Implicit);
540        assert!(cfg.providers.moonshot.enabled);
541        assert_eq!(cfg.providers.openai.prompt_cache_retention, None);
542    }
543
544    #[test]
545    fn resolve_cache_dir_expands_home() {
546        let cfg = PromptCachingConfig {
547            cache_dir: "~/.custom/cache".to_string(),
548            ..PromptCachingConfig::default()
549        };
550        let resolved = cfg.resolve_cache_dir(None);
551        if let Some(home) = dirs::home_dir() {
552            assert!(resolved.starts_with(home));
553        } else {
554            assert_eq!(resolved, PathBuf::from(".custom/cache"));
555        }
556    }
557
558    #[test]
559    fn resolve_cache_dir_uses_workspace_when_relative() {
560        let temp = TempDir::new().unwrap();
561        let workspace = temp.path();
562        let cfg = PromptCachingConfig {
563            cache_dir: "relative/cache".to_string(),
564            ..PromptCachingConfig::default()
565        };
566        let resolved = cfg.resolve_cache_dir(Some(workspace));
567        assert_eq!(resolved, workspace.join("relative/cache"));
568    }
569
570    #[test]
571    fn parse_retention_duration_valid_and_invalid() {
572        assert_eq!(
573            parse_retention_duration("24h").unwrap(),
574            std::time::Duration::from_secs(86400)
575        );
576        assert_eq!(
577            parse_retention_duration("5m").unwrap(),
578            std::time::Duration::from_secs(300)
579        );
580        assert_eq!(
581            parse_retention_duration("1s").unwrap(),
582            std::time::Duration::from_secs(1)
583        );
584        assert!(parse_retention_duration("0s").is_err());
585        assert!(parse_retention_duration("31d").is_err());
586        assert!(parse_retention_duration("abc").is_err());
587        assert!(parse_retention_duration("").is_err());
588        assert!(parse_retention_duration("10x").is_err());
589    }
590
591    #[test]
592    fn validate_prompt_cache_rejects_invalid_retention() {
593        let mut cfg = PromptCachingConfig::default();
594        cfg.providers.openai.prompt_cache_retention = Some("invalid".to_string());
595        assert!(cfg.validate().is_err());
596    }
597}