Skip to main content

vtcode_config/core/
prompt_cache.rs

1use crate::constants::prompt_cache;
2use anyhow::Context;
3use regex::Regex;
4use serde::{Deserialize, Serialize};
5use std::path::{Path, PathBuf};
6use std::time::Duration;
7
8/// Global prompt caching configuration loaded from vtcode.toml
9#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
10#[derive(Debug, Clone, Deserialize, Serialize)]
11pub struct PromptCachingConfig {
12    /// Enable prompt caching features globally
13    #[serde(default = "default_enabled")]
14    pub enabled: bool,
15
16    /// Base directory for local prompt cache storage (supports `~` expansion)
17    #[serde(default = "default_cache_dir")]
18    pub cache_dir: String,
19
20    /// Maximum number of cached prompt entries to retain on disk
21    #[serde(default = "default_max_entries")]
22    pub max_entries: usize,
23
24    /// Maximum age (in days) before cached entries are purged
25    #[serde(default = "default_max_age_days")]
26    pub max_age_days: u64,
27
28    /// Automatically evict stale entries on startup/shutdown
29    #[serde(default = "default_auto_cleanup")]
30    pub enable_auto_cleanup: bool,
31
32    /// Minimum quality score required before persisting an entry
33    #[serde(default = "default_min_quality_threshold")]
34    pub min_quality_threshold: f64,
35
36    /// Provider specific overrides
37    #[serde(default)]
38    pub providers: ProviderPromptCachingConfig,
39}
40
41impl Default for PromptCachingConfig {
42    fn default() -> Self {
43        Self {
44            enabled: default_enabled(),
45            cache_dir: default_cache_dir(),
46            max_entries: default_max_entries(),
47            max_age_days: default_max_age_days(),
48            enable_auto_cleanup: default_auto_cleanup(),
49            min_quality_threshold: default_min_quality_threshold(),
50            providers: ProviderPromptCachingConfig::default(),
51        }
52    }
53}
54
55impl PromptCachingConfig {
56    /// Resolve the configured cache directory to an absolute path
57    ///
58    /// - `~` is expanded to the user's home directory when available
59    /// - Relative paths are resolved against the provided workspace root when supplied
60    /// - Falls back to the configured string when neither applies
61    pub fn resolve_cache_dir(&self, workspace_root: Option<&Path>) -> PathBuf {
62        resolve_path(&self.cache_dir, workspace_root)
63    }
64}
65
66/// Per-provider configuration overrides
67#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
68#[derive(Debug, Clone, Deserialize, Serialize, Default)]
69pub struct ProviderPromptCachingConfig {
70    #[serde(default = "OpenAIPromptCacheSettings::default")]
71    pub openai: OpenAIPromptCacheSettings,
72
73    #[serde(default = "AnthropicPromptCacheSettings::default")]
74    pub anthropic: AnthropicPromptCacheSettings,
75
76    #[serde(default = "GeminiPromptCacheSettings::default")]
77    pub gemini: GeminiPromptCacheSettings,
78
79    #[serde(default = "OpenRouterPromptCacheSettings::default")]
80    pub openrouter: OpenRouterPromptCacheSettings,
81
82    #[serde(default = "MoonshotPromptCacheSettings::default")]
83    pub moonshot: MoonshotPromptCacheSettings,
84
85    #[serde(default = "XAIPromptCacheSettings::default")]
86    pub xai: XAIPromptCacheSettings,
87
88    #[serde(default = "DeepSeekPromptCacheSettings::default")]
89    pub deepseek: DeepSeekPromptCacheSettings,
90
91    #[serde(default = "ZaiPromptCacheSettings::default")]
92    pub zai: ZaiPromptCacheSettings,
93}
94
95/// OpenAI prompt caching controls (automatic with metrics)
96#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
97#[derive(Debug, Clone, Deserialize, Serialize)]
98pub struct OpenAIPromptCacheSettings {
99    #[serde(default = "default_true")]
100    pub enabled: bool,
101
102    #[serde(default = "default_openai_min_prefix_tokens")]
103    pub min_prefix_tokens: u32,
104
105    #[serde(default = "default_openai_idle_expiration")]
106    pub idle_expiration_seconds: u64,
107
108    #[serde(default = "default_true")]
109    pub surface_metrics: bool,
110
111    /// Strategy for generating OpenAI `prompt_cache_key`.
112    /// Session mode derives one stable key per VT Code conversation.
113    #[serde(default = "default_openai_prompt_cache_key_mode")]
114    pub prompt_cache_key_mode: OpenAIPromptCacheKeyMode,
115
116    /// Optional prompt cache retention string to pass directly into OpenAI Responses API
117    /// Example: "24h" or "1d". If set, VT Code will include `prompt_cache_retention`
118    /// in the request body to extend the model-side prompt caching window.
119    #[serde(default)]
120    pub prompt_cache_retention: Option<String>,
121}
122
123impl Default for OpenAIPromptCacheSettings {
124    fn default() -> Self {
125        Self {
126            enabled: default_true(),
127            min_prefix_tokens: default_openai_min_prefix_tokens(),
128            idle_expiration_seconds: default_openai_idle_expiration(),
129            surface_metrics: default_true(),
130            prompt_cache_key_mode: default_openai_prompt_cache_key_mode(),
131            prompt_cache_retention: None,
132        }
133    }
134}
135
136impl OpenAIPromptCacheSettings {
137    /// Validate OpenAI provider prompt cache settings. Returns Err if the retention value is invalid.
138    pub fn validate(&self) -> anyhow::Result<()> {
139        if let Some(ref retention) = self.prompt_cache_retention {
140            parse_retention_duration(retention)
141                .with_context(|| format!("Invalid prompt_cache_retention: {}", retention))?;
142        }
143        Ok(())
144    }
145}
146
147/// OpenAI prompt cache key derivation mode.
148#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
149#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, Default)]
150#[serde(rename_all = "snake_case")]
151pub enum OpenAIPromptCacheKeyMode {
152    /// Do not send `prompt_cache_key` in OpenAI requests.
153    Off,
154    /// Send one stable `prompt_cache_key` per VT Code session.
155    #[default]
156    Session,
157}
158
159/// Anthropic Claude cache control settings
160#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
161#[derive(Debug, Clone, Deserialize, Serialize)]
162pub struct AnthropicPromptCacheSettings {
163    #[serde(default = "default_true")]
164    pub enabled: bool,
165
166    /// Default TTL in seconds for the first cache breakpoint (tools/system).
167    /// Anthropic only supports "5m" (300s) or "1h" (3600s) TTL formats.
168    /// Set to >= 3600 for 1-hour cache on tools and system prompts.
169    /// Default: 3600 (1 hour) - recommended for stable tool definitions
170    #[serde(default = "default_anthropic_tools_ttl")]
171    pub tools_ttl_seconds: u64,
172
173    /// TTL for subsequent cache breakpoints (messages).
174    /// Set to >= 3600 for 1-hour cache on messages.
175    /// Default: 300 (5 minutes) - recommended for frequently changing messages
176    #[serde(default = "default_anthropic_messages_ttl")]
177    pub messages_ttl_seconds: u64,
178
179    /// Maximum number of cache breakpoints to use (max 4 per Anthropic spec).
180    /// Default: 4
181    #[serde(default = "default_anthropic_max_breakpoints")]
182    pub max_breakpoints: u8,
183
184    /// Apply cache control to system prompts by default
185    #[serde(default = "default_true")]
186    pub cache_system_messages: bool,
187
188    /// Apply cache control to user messages exceeding threshold
189    #[serde(default = "default_true")]
190    pub cache_user_messages: bool,
191
192    /// Apply cache control to tool definitions by default
193    /// Default: true (tools are typically stable and benefit from longer caching)
194    #[serde(default = "default_true")]
195    pub cache_tool_definitions: bool,
196
197    /// Minimum message length (in characters) before applying cache control
198    /// to avoid caching very short messages that don't benefit from caching.
199    /// Default: 256 characters (~64 tokens)
200    #[serde(default = "default_min_message_length")]
201    pub min_message_length_for_cache: usize,
202
203    /// Extended TTL for Anthropic prompt caching (in seconds)
204    /// Set to >= 3600 for 1-hour cache on messages
205    #[serde(default = "default_anthropic_extended_ttl")]
206    pub extended_ttl_seconds: Option<u64>,
207}
208
209impl Default for AnthropicPromptCacheSettings {
210    fn default() -> Self {
211        Self {
212            enabled: default_true(),
213            tools_ttl_seconds: default_anthropic_tools_ttl(),
214            messages_ttl_seconds: default_anthropic_messages_ttl(),
215            max_breakpoints: default_anthropic_max_breakpoints(),
216            cache_system_messages: default_true(),
217            cache_user_messages: default_true(),
218            cache_tool_definitions: default_true(),
219            min_message_length_for_cache: default_min_message_length(),
220            extended_ttl_seconds: default_anthropic_extended_ttl(),
221        }
222    }
223}
224
225/// Gemini API caching preferences
226#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
227#[derive(Debug, Clone, Deserialize, Serialize)]
228pub struct GeminiPromptCacheSettings {
229    #[serde(default = "default_true")]
230    pub enabled: bool,
231
232    #[serde(default = "default_gemini_mode")]
233    pub mode: GeminiPromptCacheMode,
234
235    #[serde(default = "default_gemini_min_prefix_tokens")]
236    pub min_prefix_tokens: u32,
237
238    /// TTL for explicit caches (ignored in implicit mode)
239    #[serde(default = "default_gemini_explicit_ttl")]
240    pub explicit_ttl_seconds: Option<u64>,
241}
242
243impl Default for GeminiPromptCacheSettings {
244    fn default() -> Self {
245        Self {
246            enabled: default_true(),
247            mode: GeminiPromptCacheMode::default(),
248            min_prefix_tokens: default_gemini_min_prefix_tokens(),
249            explicit_ttl_seconds: default_gemini_explicit_ttl(),
250        }
251    }
252}
253
254/// Gemini prompt caching mode selection
255#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
256#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
257#[serde(rename_all = "snake_case")]
258#[derive(Default)]
259pub enum GeminiPromptCacheMode {
260    #[default]
261    Implicit,
262    Explicit,
263    Off,
264}
265
266/// OpenRouter passthrough caching controls
267#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
268#[derive(Debug, Clone, Deserialize, Serialize)]
269pub struct OpenRouterPromptCacheSettings {
270    #[serde(default = "default_true")]
271    pub enabled: bool,
272
273    /// Propagate provider cache instructions automatically
274    #[serde(default = "default_true")]
275    pub propagate_provider_capabilities: bool,
276
277    /// Surface cache savings reported by OpenRouter
278    #[serde(default = "default_true")]
279    pub report_savings: bool,
280}
281
282impl Default for OpenRouterPromptCacheSettings {
283    fn default() -> Self {
284        Self {
285            enabled: default_true(),
286            propagate_provider_capabilities: default_true(),
287            report_savings: default_true(),
288        }
289    }
290}
291
292/// Moonshot prompt caching configuration (leverages server-side reuse)
293#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
294#[derive(Debug, Clone, Deserialize, Serialize)]
295pub struct MoonshotPromptCacheSettings {
296    #[serde(default = "default_moonshot_enabled")]
297    pub enabled: bool,
298}
299
300impl Default for MoonshotPromptCacheSettings {
301    fn default() -> Self {
302        Self {
303            enabled: default_moonshot_enabled(),
304        }
305    }
306}
307
308/// xAI prompt caching configuration (automatic platform-level cache)
309#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
310#[derive(Debug, Clone, Deserialize, Serialize)]
311pub struct XAIPromptCacheSettings {
312    #[serde(default = "default_true")]
313    pub enabled: bool,
314}
315
316impl Default for XAIPromptCacheSettings {
317    fn default() -> Self {
318        Self {
319            enabled: default_true(),
320        }
321    }
322}
323
324/// DeepSeek prompt caching configuration (automatic KV cache reuse)
325#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
326#[derive(Debug, Clone, Deserialize, Serialize)]
327pub struct DeepSeekPromptCacheSettings {
328    #[serde(default = "default_true")]
329    pub enabled: bool,
330
331    /// Emit cache hit/miss metrics from responses when available
332    #[serde(default = "default_true")]
333    pub surface_metrics: bool,
334}
335
336impl Default for DeepSeekPromptCacheSettings {
337    fn default() -> Self {
338        Self {
339            enabled: default_true(),
340            surface_metrics: default_true(),
341        }
342    }
343}
344
345/// Z.AI prompt caching configuration (disabled until platform exposes metrics)
346#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
347#[derive(Debug, Clone, Deserialize, Serialize)]
348pub struct ZaiPromptCacheSettings {
349    #[serde(default = "default_zai_enabled")]
350    pub enabled: bool,
351}
352
353impl Default for ZaiPromptCacheSettings {
354    fn default() -> Self {
355        Self {
356            enabled: default_zai_enabled(),
357        }
358    }
359}
360
361fn default_enabled() -> bool {
362    prompt_cache::DEFAULT_ENABLED
363}
364
365fn default_cache_dir() -> String {
366    format!("~/{path}", path = prompt_cache::DEFAULT_CACHE_DIR)
367}
368
369fn default_max_entries() -> usize {
370    prompt_cache::DEFAULT_MAX_ENTRIES
371}
372
373fn default_max_age_days() -> u64 {
374    prompt_cache::DEFAULT_MAX_AGE_DAYS
375}
376
377fn default_auto_cleanup() -> bool {
378    prompt_cache::DEFAULT_AUTO_CLEANUP
379}
380
381fn default_min_quality_threshold() -> f64 {
382    prompt_cache::DEFAULT_MIN_QUALITY_THRESHOLD
383}
384
385fn default_true() -> bool {
386    true
387}
388
389fn default_openai_min_prefix_tokens() -> u32 {
390    prompt_cache::OPENAI_MIN_PREFIX_TOKENS
391}
392
393fn default_openai_idle_expiration() -> u64 {
394    prompt_cache::OPENAI_IDLE_EXPIRATION_SECONDS
395}
396
397fn default_openai_prompt_cache_key_mode() -> OpenAIPromptCacheKeyMode {
398    OpenAIPromptCacheKeyMode::Session
399}
400
401#[allow(dead_code)]
402fn default_anthropic_default_ttl() -> u64 {
403    prompt_cache::ANTHROPIC_DEFAULT_TTL_SECONDS
404}
405
406#[allow(dead_code)]
407fn default_anthropic_extended_ttl() -> Option<u64> {
408    Some(prompt_cache::ANTHROPIC_EXTENDED_TTL_SECONDS)
409}
410
411fn default_anthropic_tools_ttl() -> u64 {
412    prompt_cache::ANTHROPIC_TOOLS_TTL_SECONDS
413}
414
415fn default_anthropic_messages_ttl() -> u64 {
416    prompt_cache::ANTHROPIC_MESSAGES_TTL_SECONDS
417}
418
419fn default_anthropic_max_breakpoints() -> u8 {
420    prompt_cache::ANTHROPIC_MAX_BREAKPOINTS
421}
422
423#[allow(dead_code)]
424fn default_min_message_length() -> usize {
425    prompt_cache::ANTHROPIC_MIN_MESSAGE_LENGTH_FOR_CACHE
426}
427
428fn default_gemini_min_prefix_tokens() -> u32 {
429    prompt_cache::GEMINI_MIN_PREFIX_TOKENS
430}
431
432fn default_gemini_explicit_ttl() -> Option<u64> {
433    Some(prompt_cache::GEMINI_EXPLICIT_DEFAULT_TTL_SECONDS)
434}
435
436fn default_gemini_mode() -> GeminiPromptCacheMode {
437    GeminiPromptCacheMode::Implicit
438}
439
440fn default_zai_enabled() -> bool {
441    prompt_cache::ZAI_CACHE_ENABLED
442}
443
444fn default_moonshot_enabled() -> bool {
445    prompt_cache::MOONSHOT_CACHE_ENABLED
446}
447
448fn resolve_path(input: &str, workspace_root: Option<&Path>) -> PathBuf {
449    let trimmed = input.trim();
450    if trimmed.is_empty() {
451        return resolve_default_cache_dir();
452    }
453
454    if let Some(stripped) = trimmed
455        .strip_prefix("~/")
456        .or_else(|| trimmed.strip_prefix("~\\"))
457    {
458        if let Some(home) = dirs::home_dir() {
459            return home.join(stripped);
460        }
461        return PathBuf::from(stripped);
462    }
463
464    let candidate = Path::new(trimmed);
465    if candidate.is_absolute() {
466        return candidate.to_path_buf();
467    }
468
469    if let Some(root) = workspace_root {
470        return root.join(candidate);
471    }
472
473    candidate.to_path_buf()
474}
475
476fn resolve_default_cache_dir() -> PathBuf {
477    if let Some(home) = dirs::home_dir() {
478        return home.join(prompt_cache::DEFAULT_CACHE_DIR);
479    }
480    PathBuf::from(prompt_cache::DEFAULT_CACHE_DIR)
481}
482
483/// Parse a duration string into a std::time::Duration
484/// Acceptable formats: <number>[s|m|h|d], e.g., "30s", "5m", "24h", "1d".
485fn parse_retention_duration(input: &str) -> anyhow::Result<Duration> {
486    let input = input.trim();
487    if input.is_empty() {
488        anyhow::bail!("Empty retention string");
489    }
490
491    // Strict format: number + unit (s|m|h|d)
492    let re = Regex::new(r"^(\d+)([smhdSMHD])$").unwrap();
493    let caps = re
494        .captures(input)
495        .ok_or_else(|| anyhow::anyhow!("Invalid retention format; use <number>[s|m|h|d]"))?;
496
497    let value_str = caps.get(1).unwrap().as_str();
498    let unit = caps
499        .get(2)
500        .unwrap()
501        .as_str()
502        .chars()
503        .next()
504        .unwrap()
505        .to_ascii_lowercase();
506    let value: u64 = value_str
507        .parse()
508        .with_context(|| format!("Invalid numeric value in retention: {}", value_str))?;
509
510    let seconds = match unit {
511        's' => value,
512        'm' => value * 60,
513        'h' => value * 60 * 60,
514        'd' => value * 24 * 60 * 60,
515        _ => anyhow::bail!("Invalid retention unit; expected s,m,h,d"),
516    };
517
518    // Enforce a reasonable retention window: at least 1s and max 30 days
519    const MIN_SECONDS: u64 = 1;
520    const MAX_SECONDS: u64 = 30 * 24 * 60 * 60; // 30 days
521    if !((MIN_SECONDS..=MAX_SECONDS).contains(&seconds)) {
522        anyhow::bail!("prompt_cache_retention must be between 1s and 30d");
523    }
524
525    Ok(Duration::from_secs(seconds))
526}
527
528impl PromptCachingConfig {
529    /// Validate prompt cache config and provider overrides
530    pub fn validate(&self) -> anyhow::Result<()> {
531        // Validate OpenAI provider settings
532        self.providers.openai.validate()?;
533        Ok(())
534    }
535}
536
537#[cfg(test)]
538mod tests {
539    use super::*;
540    use assert_fs::TempDir;
541
542    #[test]
543    fn prompt_caching_defaults_align_with_constants() {
544        let cfg = PromptCachingConfig::default();
545        assert!(cfg.enabled);
546        assert_eq!(cfg.max_entries, prompt_cache::DEFAULT_MAX_ENTRIES);
547        assert_eq!(cfg.max_age_days, prompt_cache::DEFAULT_MAX_AGE_DAYS);
548        assert!(
549            (cfg.min_quality_threshold - prompt_cache::DEFAULT_MIN_QUALITY_THRESHOLD).abs()
550                < f64::EPSILON
551        );
552        assert!(cfg.providers.openai.enabled);
553        assert_eq!(
554            cfg.providers.openai.min_prefix_tokens,
555            prompt_cache::OPENAI_MIN_PREFIX_TOKENS
556        );
557        assert_eq!(
558            cfg.providers.openai.prompt_cache_key_mode,
559            OpenAIPromptCacheKeyMode::Session
560        );
561        assert_eq!(
562            cfg.providers.anthropic.extended_ttl_seconds,
563            Some(prompt_cache::ANTHROPIC_EXTENDED_TTL_SECONDS)
564        );
565        assert_eq!(cfg.providers.gemini.mode, GeminiPromptCacheMode::Implicit);
566        assert!(cfg.providers.moonshot.enabled);
567        assert_eq!(cfg.providers.openai.prompt_cache_retention, None);
568    }
569
570    #[test]
571    fn resolve_cache_dir_expands_home() {
572        let cfg = PromptCachingConfig {
573            cache_dir: "~/.custom/cache".to_string(),
574            ..PromptCachingConfig::default()
575        };
576        let resolved = cfg.resolve_cache_dir(None);
577        if let Some(home) = dirs::home_dir() {
578            assert!(resolved.starts_with(home));
579        } else {
580            assert_eq!(resolved, PathBuf::from(".custom/cache"));
581        }
582    }
583
584    #[test]
585    fn resolve_cache_dir_uses_workspace_when_relative() {
586        let temp = TempDir::new().unwrap();
587        let workspace = temp.path();
588        let cfg = PromptCachingConfig {
589            cache_dir: "relative/cache".to_string(),
590            ..PromptCachingConfig::default()
591        };
592        let resolved = cfg.resolve_cache_dir(Some(workspace));
593        assert_eq!(resolved, workspace.join("relative/cache"));
594    }
595
596    #[test]
597    fn parse_retention_duration_valid_and_invalid() {
598        assert_eq!(
599            parse_retention_duration("24h").unwrap(),
600            std::time::Duration::from_secs(86400)
601        );
602        assert_eq!(
603            parse_retention_duration("5m").unwrap(),
604            std::time::Duration::from_secs(300)
605        );
606        assert_eq!(
607            parse_retention_duration("1s").unwrap(),
608            std::time::Duration::from_secs(1)
609        );
610        assert!(parse_retention_duration("0s").is_err());
611        assert!(parse_retention_duration("31d").is_err());
612        assert!(parse_retention_duration("abc").is_err());
613        assert!(parse_retention_duration("").is_err());
614        assert!(parse_retention_duration("10x").is_err());
615    }
616
617    #[test]
618    fn validate_prompt_cache_rejects_invalid_retention() {
619        let mut cfg = PromptCachingConfig::default();
620        cfg.providers.openai.prompt_cache_retention = Some("invalid".to_string());
621        assert!(cfg.validate().is_err());
622    }
623
624    #[test]
625    fn prompt_cache_key_mode_parses_from_toml() {
626        let parsed: PromptCachingConfig = toml::from_str(
627            r#"
628[providers.openai]
629prompt_cache_key_mode = "off"
630"#,
631        )
632        .expect("prompt cache config should parse");
633
634        assert_eq!(
635            parsed.providers.openai.prompt_cache_key_mode,
636            OpenAIPromptCacheKeyMode::Off
637        );
638    }
639}