Skip to main content

vtcode_config/core/
prompt_cache.rs

1use crate::constants::prompt_cache;
2use anyhow::Context;
3use regex::Regex;
4use serde::{Deserialize, Serialize};
5use std::path::{Path, PathBuf};
6use std::time::Duration;
7
8/// Global prompt caching configuration loaded from vtcode.toml
9#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
10#[derive(Debug, Clone, Deserialize, Serialize)]
11pub struct PromptCachingConfig {
12    /// Enable prompt caching features globally
13    #[serde(default = "default_enabled")]
14    pub enabled: bool,
15
16    /// Base directory for local prompt cache storage (supports `~` expansion)
17    #[serde(default = "default_cache_dir")]
18    pub cache_dir: String,
19
20    /// Maximum number of cached prompt entries to retain on disk
21    #[serde(default = "default_max_entries")]
22    pub max_entries: usize,
23
24    /// Maximum age (in days) before cached entries are purged
25    #[serde(default = "default_max_age_days")]
26    pub max_age_days: u64,
27
28    /// Automatically evict stale entries on startup/shutdown
29    #[serde(default = "default_auto_cleanup")]
30    pub enable_auto_cleanup: bool,
31
32    /// Minimum quality score required before persisting an entry
33    #[serde(default = "default_min_quality_threshold")]
34    pub min_quality_threshold: f64,
35
36    /// Provider specific overrides
37    #[serde(default)]
38    pub providers: ProviderPromptCachingConfig,
39}
40
41impl Default for PromptCachingConfig {
42    fn default() -> Self {
43        Self {
44            enabled: default_enabled(),
45            cache_dir: default_cache_dir(),
46            max_entries: default_max_entries(),
47            max_age_days: default_max_age_days(),
48            enable_auto_cleanup: default_auto_cleanup(),
49            min_quality_threshold: default_min_quality_threshold(),
50            providers: ProviderPromptCachingConfig::default(),
51        }
52    }
53}
54
55impl PromptCachingConfig {
56    /// Resolve the configured cache directory to an absolute path
57    ///
58    /// - `~` is expanded to the user's home directory when available
59    /// - Relative paths are resolved against the provided workspace root when supplied
60    /// - Falls back to the configured string when neither applies
61    pub fn resolve_cache_dir(&self, workspace_root: Option<&Path>) -> PathBuf {
62        resolve_path(&self.cache_dir, workspace_root)
63    }
64}
65
66/// Per-provider configuration overrides
67#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
68#[derive(Debug, Clone, Deserialize, Serialize, Default)]
69pub struct ProviderPromptCachingConfig {
70    #[serde(default = "OpenAIPromptCacheSettings::default")]
71    pub openai: OpenAIPromptCacheSettings,
72
73    #[serde(default = "AnthropicPromptCacheSettings::default")]
74    pub anthropic: AnthropicPromptCacheSettings,
75
76    #[serde(default = "GeminiPromptCacheSettings::default")]
77    pub gemini: GeminiPromptCacheSettings,
78
79    #[serde(default = "OpenRouterPromptCacheSettings::default")]
80    pub openrouter: OpenRouterPromptCacheSettings,
81
82    #[serde(default = "MoonshotPromptCacheSettings::default")]
83    pub moonshot: MoonshotPromptCacheSettings,
84
85    #[serde(default = "DeepSeekPromptCacheSettings::default")]
86    pub deepseek: DeepSeekPromptCacheSettings,
87
88    #[serde(default = "ZaiPromptCacheSettings::default")]
89    pub zai: ZaiPromptCacheSettings,
90}
91
92/// OpenAI prompt caching controls (automatic with metrics)
93#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
94#[derive(Debug, Clone, Deserialize, Serialize)]
95pub struct OpenAIPromptCacheSettings {
96    #[serde(default = "default_true")]
97    pub enabled: bool,
98
99    #[serde(default = "default_openai_min_prefix_tokens")]
100    pub min_prefix_tokens: u32,
101
102    #[serde(default = "default_openai_idle_expiration")]
103    pub idle_expiration_seconds: u64,
104
105    #[serde(default = "default_true")]
106    pub surface_metrics: bool,
107
108    /// Strategy for generating OpenAI `prompt_cache_key`.
109    /// Session mode derives one stable key per VT Code conversation.
110    #[serde(default = "default_openai_prompt_cache_key_mode")]
111    pub prompt_cache_key_mode: OpenAIPromptCacheKeyMode,
112
113    /// Optional prompt cache retention string to pass directly into OpenAI Responses API
114    /// Example: "24h" or "1d". If set, VT Code will include `prompt_cache_retention`
115    /// in the request body to extend the model-side prompt caching window.
116    #[serde(default)]
117    pub prompt_cache_retention: Option<String>,
118}
119
120impl Default for OpenAIPromptCacheSettings {
121    fn default() -> Self {
122        Self {
123            enabled: default_true(),
124            min_prefix_tokens: default_openai_min_prefix_tokens(),
125            idle_expiration_seconds: default_openai_idle_expiration(),
126            surface_metrics: default_true(),
127            prompt_cache_key_mode: default_openai_prompt_cache_key_mode(),
128            prompt_cache_retention: None,
129        }
130    }
131}
132
133impl OpenAIPromptCacheSettings {
134    /// Validate OpenAI provider prompt cache settings. Returns Err if the retention value is invalid.
135    pub fn validate(&self) -> anyhow::Result<()> {
136        if let Some(ref retention) = self.prompt_cache_retention {
137            parse_retention_duration(retention)
138                .with_context(|| format!("Invalid prompt_cache_retention: {}", retention))?;
139        }
140        Ok(())
141    }
142}
143
144/// OpenAI prompt cache key derivation mode.
145#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
146#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, Default)]
147#[serde(rename_all = "snake_case")]
148pub enum OpenAIPromptCacheKeyMode {
149    /// Do not send `prompt_cache_key` in OpenAI requests.
150    Off,
151    /// Send one stable `prompt_cache_key` per VT Code session.
152    #[default]
153    Session,
154}
155
156/// Anthropic Claude cache control settings
157#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
158#[derive(Debug, Clone, Deserialize, Serialize)]
159pub struct AnthropicPromptCacheSettings {
160    #[serde(default = "default_true")]
161    pub enabled: bool,
162
163    /// Default TTL in seconds for the first cache breakpoint (tools/system).
164    /// Anthropic only supports "5m" (300s) or "1h" (3600s) TTL formats.
165    /// Set to >= 3600 for 1-hour cache on tools and system prompts.
166    /// Default: 3600 (1 hour) - recommended for stable tool definitions
167    #[serde(default = "default_anthropic_tools_ttl")]
168    pub tools_ttl_seconds: u64,
169
170    /// TTL for subsequent cache breakpoints (messages).
171    /// Set to >= 3600 for 1-hour cache on messages.
172    /// Default: 300 (5 minutes) - recommended for frequently changing messages
173    #[serde(default = "default_anthropic_messages_ttl")]
174    pub messages_ttl_seconds: u64,
175
176    /// Maximum number of cache breakpoints to use (max 4 per Anthropic spec).
177    /// Default: 4
178    #[serde(default = "default_anthropic_max_breakpoints")]
179    pub max_breakpoints: u8,
180
181    /// Apply cache control to system prompts by default
182    #[serde(default = "default_true")]
183    pub cache_system_messages: bool,
184
185    /// Apply cache control to user messages exceeding threshold
186    #[serde(default = "default_true")]
187    pub cache_user_messages: bool,
188
189    /// Apply cache control to tool definitions by default
190    /// Default: true (tools are typically stable and benefit from longer caching)
191    #[serde(default = "default_true")]
192    pub cache_tool_definitions: bool,
193
194    /// Minimum message length (in characters) before applying cache control
195    /// to avoid caching very short messages that don't benefit from caching.
196    /// Default: 256 characters (~64 tokens)
197    #[serde(default = "default_min_message_length")]
198    pub min_message_length_for_cache: usize,
199
200    /// Extended TTL for Anthropic prompt caching (in seconds)
201    /// Set to >= 3600 for 1-hour cache on messages
202    #[serde(default = "default_anthropic_extended_ttl")]
203    pub extended_ttl_seconds: Option<u64>,
204}
205
206impl Default for AnthropicPromptCacheSettings {
207    fn default() -> Self {
208        Self {
209            enabled: default_true(),
210            tools_ttl_seconds: default_anthropic_tools_ttl(),
211            messages_ttl_seconds: default_anthropic_messages_ttl(),
212            max_breakpoints: default_anthropic_max_breakpoints(),
213            cache_system_messages: default_true(),
214            cache_user_messages: default_true(),
215            cache_tool_definitions: default_true(),
216            min_message_length_for_cache: default_min_message_length(),
217            extended_ttl_seconds: default_anthropic_extended_ttl(),
218        }
219    }
220}
221
222/// Gemini API caching preferences
223#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
224#[derive(Debug, Clone, Deserialize, Serialize)]
225pub struct GeminiPromptCacheSettings {
226    #[serde(default = "default_true")]
227    pub enabled: bool,
228
229    #[serde(default = "default_gemini_mode")]
230    pub mode: GeminiPromptCacheMode,
231
232    #[serde(default = "default_gemini_min_prefix_tokens")]
233    pub min_prefix_tokens: u32,
234
235    /// TTL for explicit caches (ignored in implicit mode)
236    #[serde(default = "default_gemini_explicit_ttl")]
237    pub explicit_ttl_seconds: Option<u64>,
238}
239
240impl Default for GeminiPromptCacheSettings {
241    fn default() -> Self {
242        Self {
243            enabled: default_true(),
244            mode: GeminiPromptCacheMode::default(),
245            min_prefix_tokens: default_gemini_min_prefix_tokens(),
246            explicit_ttl_seconds: default_gemini_explicit_ttl(),
247        }
248    }
249}
250
251/// Gemini prompt caching mode selection
252#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
253#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)]
254#[serde(rename_all = "snake_case")]
255#[derive(Default)]
256pub enum GeminiPromptCacheMode {
257    #[default]
258    Implicit,
259    Explicit,
260    Off,
261}
262
263/// OpenRouter passthrough caching controls
264#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
265#[derive(Debug, Clone, Deserialize, Serialize)]
266pub struct OpenRouterPromptCacheSettings {
267    #[serde(default = "default_true")]
268    pub enabled: bool,
269
270    /// Propagate provider cache instructions automatically
271    #[serde(default = "default_true")]
272    pub propagate_provider_capabilities: bool,
273
274    /// Surface cache savings reported by OpenRouter
275    #[serde(default = "default_true")]
276    pub report_savings: bool,
277}
278
279impl Default for OpenRouterPromptCacheSettings {
280    fn default() -> Self {
281        Self {
282            enabled: default_true(),
283            propagate_provider_capabilities: default_true(),
284            report_savings: default_true(),
285        }
286    }
287}
288
289/// Moonshot prompt caching configuration (leverages server-side reuse)
290#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
291#[derive(Debug, Clone, Deserialize, Serialize)]
292pub struct MoonshotPromptCacheSettings {
293    #[serde(default = "default_moonshot_enabled")]
294    pub enabled: bool,
295}
296
297impl Default for MoonshotPromptCacheSettings {
298    fn default() -> Self {
299        Self {
300            enabled: default_moonshot_enabled(),
301        }
302    }
303}
304
305/// DeepSeek prompt caching configuration (automatic KV cache reuse)
306#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
307#[derive(Debug, Clone, Deserialize, Serialize)]
308pub struct DeepSeekPromptCacheSettings {
309    #[serde(default = "default_true")]
310    pub enabled: bool,
311
312    /// Emit cache hit/miss metrics from responses when available
313    #[serde(default = "default_true")]
314    pub surface_metrics: bool,
315}
316
317impl Default for DeepSeekPromptCacheSettings {
318    fn default() -> Self {
319        Self {
320            enabled: default_true(),
321            surface_metrics: default_true(),
322        }
323    }
324}
325
326/// Z.AI prompt caching configuration (disabled until platform exposes metrics)
327#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
328#[derive(Debug, Clone, Deserialize, Serialize)]
329pub struct ZaiPromptCacheSettings {
330    #[serde(default = "default_zai_enabled")]
331    pub enabled: bool,
332}
333
334impl Default for ZaiPromptCacheSettings {
335    fn default() -> Self {
336        Self {
337            enabled: default_zai_enabled(),
338        }
339    }
340}
341
342fn default_enabled() -> bool {
343    prompt_cache::DEFAULT_ENABLED
344}
345
346fn default_cache_dir() -> String {
347    format!("~/{path}", path = prompt_cache::DEFAULT_CACHE_DIR)
348}
349
350fn default_max_entries() -> usize {
351    prompt_cache::DEFAULT_MAX_ENTRIES
352}
353
354fn default_max_age_days() -> u64 {
355    prompt_cache::DEFAULT_MAX_AGE_DAYS
356}
357
358fn default_auto_cleanup() -> bool {
359    prompt_cache::DEFAULT_AUTO_CLEANUP
360}
361
362fn default_min_quality_threshold() -> f64 {
363    prompt_cache::DEFAULT_MIN_QUALITY_THRESHOLD
364}
365
366fn default_true() -> bool {
367    true
368}
369
370fn default_openai_min_prefix_tokens() -> u32 {
371    prompt_cache::OPENAI_MIN_PREFIX_TOKENS
372}
373
374fn default_openai_idle_expiration() -> u64 {
375    prompt_cache::OPENAI_IDLE_EXPIRATION_SECONDS
376}
377
378fn default_openai_prompt_cache_key_mode() -> OpenAIPromptCacheKeyMode {
379    OpenAIPromptCacheKeyMode::Session
380}
381
382#[allow(dead_code)]
383fn default_anthropic_default_ttl() -> u64 {
384    prompt_cache::ANTHROPIC_DEFAULT_TTL_SECONDS
385}
386
387#[allow(dead_code)]
388fn default_anthropic_extended_ttl() -> Option<u64> {
389    Some(prompt_cache::ANTHROPIC_EXTENDED_TTL_SECONDS)
390}
391
392fn default_anthropic_tools_ttl() -> u64 {
393    prompt_cache::ANTHROPIC_TOOLS_TTL_SECONDS
394}
395
396fn default_anthropic_messages_ttl() -> u64 {
397    prompt_cache::ANTHROPIC_MESSAGES_TTL_SECONDS
398}
399
400fn default_anthropic_max_breakpoints() -> u8 {
401    prompt_cache::ANTHROPIC_MAX_BREAKPOINTS
402}
403
404#[allow(dead_code)]
405fn default_min_message_length() -> usize {
406    prompt_cache::ANTHROPIC_MIN_MESSAGE_LENGTH_FOR_CACHE
407}
408
409fn default_gemini_min_prefix_tokens() -> u32 {
410    prompt_cache::GEMINI_MIN_PREFIX_TOKENS
411}
412
413fn default_gemini_explicit_ttl() -> Option<u64> {
414    Some(prompt_cache::GEMINI_EXPLICIT_DEFAULT_TTL_SECONDS)
415}
416
417fn default_gemini_mode() -> GeminiPromptCacheMode {
418    GeminiPromptCacheMode::Implicit
419}
420
421fn default_zai_enabled() -> bool {
422    prompt_cache::ZAI_CACHE_ENABLED
423}
424
425fn default_moonshot_enabled() -> bool {
426    prompt_cache::MOONSHOT_CACHE_ENABLED
427}
428
429fn resolve_path(input: &str, workspace_root: Option<&Path>) -> PathBuf {
430    let trimmed = input.trim();
431    if trimmed.is_empty() {
432        return resolve_default_cache_dir();
433    }
434
435    if let Some(stripped) = trimmed
436        .strip_prefix("~/")
437        .or_else(|| trimmed.strip_prefix("~\\"))
438    {
439        if let Some(home) = dirs::home_dir() {
440            return home.join(stripped);
441        }
442        return PathBuf::from(stripped);
443    }
444
445    let candidate = Path::new(trimmed);
446    if candidate.is_absolute() {
447        return candidate.to_path_buf();
448    }
449
450    if let Some(root) = workspace_root {
451        return root.join(candidate);
452    }
453
454    candidate.to_path_buf()
455}
456
457fn resolve_default_cache_dir() -> PathBuf {
458    if let Some(home) = dirs::home_dir() {
459        return home.join(prompt_cache::DEFAULT_CACHE_DIR);
460    }
461    PathBuf::from(prompt_cache::DEFAULT_CACHE_DIR)
462}
463
464/// Parse a duration string into a std::time::Duration
465/// Acceptable formats: <number>[s|m|h|d], e.g., "30s", "5m", "24h", "1d".
466fn parse_retention_duration(input: &str) -> anyhow::Result<Duration> {
467    let input = input.trim();
468    if input.is_empty() {
469        anyhow::bail!("Empty retention string");
470    }
471
472    // Strict format: number + unit (s|m|h|d)
473    let re = Regex::new(r"^(\d+)([smhdSMHD])$").unwrap();
474    let caps = re
475        .captures(input)
476        .ok_or_else(|| anyhow::anyhow!("Invalid retention format; use <number>[s|m|h|d]"))?;
477
478    let value_str = caps.get(1).unwrap().as_str();
479    let unit = caps
480        .get(2)
481        .unwrap()
482        .as_str()
483        .chars()
484        .next()
485        .unwrap()
486        .to_ascii_lowercase();
487    let value: u64 = value_str
488        .parse()
489        .with_context(|| format!("Invalid numeric value in retention: {}", value_str))?;
490
491    let seconds = match unit {
492        's' => value,
493        'm' => value * 60,
494        'h' => value * 60 * 60,
495        'd' => value * 24 * 60 * 60,
496        _ => anyhow::bail!("Invalid retention unit; expected s,m,h,d"),
497    };
498
499    // Enforce a reasonable retention window: at least 1s and max 30 days
500    const MIN_SECONDS: u64 = 1;
501    const MAX_SECONDS: u64 = 30 * 24 * 60 * 60; // 30 days
502    if !((MIN_SECONDS..=MAX_SECONDS).contains(&seconds)) {
503        anyhow::bail!("prompt_cache_retention must be between 1s and 30d");
504    }
505
506    Ok(Duration::from_secs(seconds))
507}
508
509impl PromptCachingConfig {
510    /// Validate prompt cache config and provider overrides
511    pub fn validate(&self) -> anyhow::Result<()> {
512        // Validate OpenAI provider settings
513        self.providers.openai.validate()?;
514        Ok(())
515    }
516}
517
518#[cfg(test)]
519mod tests {
520    use super::*;
521    use assert_fs::TempDir;
522
523    #[test]
524    fn prompt_caching_defaults_align_with_constants() {
525        let cfg = PromptCachingConfig::default();
526        assert!(cfg.enabled);
527        assert_eq!(cfg.max_entries, prompt_cache::DEFAULT_MAX_ENTRIES);
528        assert_eq!(cfg.max_age_days, prompt_cache::DEFAULT_MAX_AGE_DAYS);
529        assert!(
530            (cfg.min_quality_threshold - prompt_cache::DEFAULT_MIN_QUALITY_THRESHOLD).abs()
531                < f64::EPSILON
532        );
533        assert!(cfg.providers.openai.enabled);
534        assert_eq!(
535            cfg.providers.openai.min_prefix_tokens,
536            prompt_cache::OPENAI_MIN_PREFIX_TOKENS
537        );
538        assert_eq!(
539            cfg.providers.openai.prompt_cache_key_mode,
540            OpenAIPromptCacheKeyMode::Session
541        );
542        assert_eq!(
543            cfg.providers.anthropic.extended_ttl_seconds,
544            Some(prompt_cache::ANTHROPIC_EXTENDED_TTL_SECONDS)
545        );
546        assert_eq!(cfg.providers.gemini.mode, GeminiPromptCacheMode::Implicit);
547        assert!(cfg.providers.moonshot.enabled);
548        assert_eq!(cfg.providers.openai.prompt_cache_retention, None);
549    }
550
551    #[test]
552    fn resolve_cache_dir_expands_home() {
553        let cfg = PromptCachingConfig {
554            cache_dir: "~/.custom/cache".to_string(),
555            ..PromptCachingConfig::default()
556        };
557        let resolved = cfg.resolve_cache_dir(None);
558        if let Some(home) = dirs::home_dir() {
559            assert!(resolved.starts_with(home));
560        } else {
561            assert_eq!(resolved, PathBuf::from(".custom/cache"));
562        }
563    }
564
565    #[test]
566    fn resolve_cache_dir_uses_workspace_when_relative() {
567        let temp = TempDir::new().unwrap();
568        let workspace = temp.path();
569        let cfg = PromptCachingConfig {
570            cache_dir: "relative/cache".to_string(),
571            ..PromptCachingConfig::default()
572        };
573        let resolved = cfg.resolve_cache_dir(Some(workspace));
574        assert_eq!(resolved, workspace.join("relative/cache"));
575    }
576
577    #[test]
578    fn parse_retention_duration_valid_and_invalid() {
579        assert_eq!(
580            parse_retention_duration("24h").unwrap(),
581            Duration::from_secs(86400)
582        );
583        assert_eq!(
584            parse_retention_duration("5m").unwrap(),
585            Duration::from_secs(300)
586        );
587        assert_eq!(
588            parse_retention_duration("1s").unwrap(),
589            Duration::from_secs(1)
590        );
591        assert!(parse_retention_duration("0s").is_err());
592        assert!(parse_retention_duration("31d").is_err());
593        assert!(parse_retention_duration("abc").is_err());
594        assert!(parse_retention_duration("").is_err());
595        assert!(parse_retention_duration("10x").is_err());
596    }
597
598    #[test]
599    fn validate_prompt_cache_rejects_invalid_retention() {
600        let mut cfg = PromptCachingConfig::default();
601        cfg.providers.openai.prompt_cache_retention = Some("invalid".to_string());
602        assert!(cfg.validate().is_err());
603    }
604
605    #[test]
606    fn prompt_cache_key_mode_parses_from_toml() {
607        let parsed: PromptCachingConfig = toml::from_str(
608            r#"
609[providers.openai]
610prompt_cache_key_mode = "off"
611"#,
612        )
613        .expect("prompt cache config should parse");
614
615        assert_eq!(
616            parsed.providers.openai.prompt_cache_key_mode,
617            OpenAIPromptCacheKeyMode::Off
618        );
619    }
620}