caching_strategies/
caching_strategies.rs

1//! Caching strategies for `OpenAI` API responses.
2#![allow(dead_code)]
3#![allow(clippy::cast_possible_truncation)]
4#![allow(clippy::cast_possible_wrap)]
5#![allow(clippy::cast_lossless)]
6#![allow(clippy::match_same_arms)]
7#![allow(clippy::collapsible_if)]
8#![allow(clippy::redundant_closure_for_method_calls)]
9#![allow(clippy::too_many_lines)]
10#![allow(clippy::missing_const_for_fn)]
11#![allow(clippy::uninlined_format_args)]
12#![allow(clippy::unused_async)]
13#![allow(clippy::unreadable_literal)]
14#![allow(clippy::map_unwrap_or)]
15#![allow(clippy::struct_excessive_bools)]
16#![allow(clippy::unused_self)]
17#![allow(clippy::significant_drop_tightening)]
18#![allow(clippy::inherent_to_string)]
19#![allow(clippy::option_if_let_else)]
20#![allow(clippy::cast_precision_loss)]
21#![allow(clippy::unnecessary_wraps)]
22#![allow(clippy::inefficient_to_string)]
23//!
24//! This example demonstrates comprehensive caching approaches including:
25//! - In-memory caching with TTL (Time To Live)
26//! - Persistent caching with file system storage
27//! - Redis-compatible distributed caching
28//! - Smart cache invalidation strategies
29//! - Cache warming and precomputation
30//! - Conditional caching based on request patterns
31//! - Cache analytics and optimization
32//! - Cost-aware caching decisions
33//!
34//! Caching benefits for AI applications:
35//! - Significant cost reduction by avoiding duplicate API calls
36//! - Improved response times for frequently requested content
37//! - Better user experience with instant responses
38//! - Reduced API rate limit pressure
39//! - Offline capability for cached responses
40//!
41//! Run with: `cargo run --example caching_strategies`
42
43use openai_ergonomic::{Client, Config, Error, Result};
44use serde::{Deserialize, Serialize};
45use std::collections::HashMap;
46use std::fs;
47use std::hash::{Hash, Hasher};
48use std::path::{Path, PathBuf};
49use std::sync::{Arc, Mutex};
50use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
51use tokio::time::sleep;
52use tracing::{debug, info, warn};
53
54/// Cache key for identifying unique requests
55#[derive(Debug, Clone, PartialEq, Eq, Hash)]
56struct CacheKey {
57    /// Request endpoint
58    endpoint: String,
59    /// Request parameters hash
60    params_hash: u64,
61    /// Model name
62    model: String,
63    /// User ID for user-specific caching
64    user_id: Option<String>,
65}
66
67impl CacheKey {
68    /// Create a cache key from request parameters
69    fn new(endpoint: &str, params: &ChatCompletionParams, user_id: Option<String>) -> Self {
70        let mut hasher = std::collections::hash_map::DefaultHasher::new();
71        params.hash(&mut hasher);
72        let params_hash = hasher.finish();
73
74        Self {
75            endpoint: endpoint.to_string(),
76            params_hash,
77            model: params.model.clone(),
78            user_id,
79        }
80    }
81
82    /// Generate a string representation for file-based caching
83    fn to_string(&self) -> String {
84        match &self.user_id {
85            Some(user) => format!(
86                "{}:{}:{}:{}",
87                self.endpoint, self.model, self.params_hash, user
88            ),
89            None => format!("{}:{}:{}", self.endpoint, self.model, self.params_hash),
90        }
91    }
92}
93
94/// Cached response with metadata
95#[derive(Debug, Clone, Serialize, Deserialize)]
96struct CachedResponse {
97    /// The actual response content
98    content: String,
99    /// When the response was cached
100    cached_at: u64,
101    /// Time-to-live in seconds
102    ttl_seconds: u64,
103    /// Token usage information
104    token_usage: TokenUsageInfo,
105    /// Response metadata
106    metadata: HashMap<String, String>,
107    /// Number of times this cache entry has been accessed
108    access_count: u64,
109    /// Last access timestamp
110    last_accessed: u64,
111}
112
113impl CachedResponse {
114    /// Check if the cached response has expired
115    fn is_expired(&self) -> bool {
116        let now = SystemTime::now()
117            .duration_since(UNIX_EPOCH)
118            .unwrap()
119            .as_secs();
120
121        now > self.cached_at + self.ttl_seconds
122    }
123
124    /// Mark this cache entry as accessed
125    fn mark_accessed(&mut self) {
126        self.access_count += 1;
127        self.last_accessed = SystemTime::now()
128            .duration_since(UNIX_EPOCH)
129            .unwrap()
130            .as_secs();
131    }
132
133    /// Calculate age of the cached response
134    fn age_seconds(&self) -> u64 {
135        let now = SystemTime::now()
136            .duration_since(UNIX_EPOCH)
137            .unwrap()
138            .as_secs();
139
140        now.saturating_sub(self.cached_at)
141    }
142}
143
144/// Token usage information for cost tracking
145#[derive(Debug, Clone, Serialize, Deserialize)]
146struct TokenUsageInfo {
147    prompt_tokens: i32,
148    completion_tokens: i32,
149    total_tokens: i32,
150    estimated_cost_usd: f64,
151}
152
153/// Chat completion request parameters
154#[derive(Debug, Clone, Serialize, Deserialize)]
155struct ChatCompletionParams {
156    model: String,
157    messages: Vec<ChatMessage>,
158    temperature: Option<f64>,
159    max_tokens: Option<i32>,
160    top_p: Option<f64>,
161    frequency_penalty: Option<f64>,
162    presence_penalty: Option<f64>,
163}
164
165impl std::hash::Hash for ChatCompletionParams {
166    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
167        self.model.hash(state);
168        self.messages.hash(state);
169        // Convert f64 to bits for hashing
170        if let Some(temp) = self.temperature {
171            temp.to_bits().hash(state);
172        }
173        self.max_tokens.hash(state);
174        if let Some(top_p) = self.top_p {
175            top_p.to_bits().hash(state);
176        }
177        if let Some(freq_penalty) = self.frequency_penalty {
178            freq_penalty.to_bits().hash(state);
179        }
180        if let Some(pres_penalty) = self.presence_penalty {
181            pres_penalty.to_bits().hash(state);
182        }
183    }
184}
185
186/// Chat message
187#[derive(Debug, Clone, Hash, Serialize, Deserialize)]
188struct ChatMessage {
189    role: String,
190    content: String,
191}
192
193impl ChatMessage {
194    fn user(content: &str) -> Self {
195        Self {
196            role: "user".to_string(),
197            content: content.to_string(),
198        }
199    }
200
201    fn system(content: &str) -> Self {
202        Self {
203            role: "system".to_string(),
204            content: content.to_string(),
205        }
206    }
207
208    fn assistant(content: &str) -> Self {
209        Self {
210            role: "assistant".to_string(),
211            content: content.to_string(),
212        }
213    }
214}
215
216/// Cache statistics for monitoring and optimization
217#[derive(Debug, Clone)]
218struct CacheStats {
219    /// Total cache requests
220    total_requests: u64,
221    /// Cache hits
222    cache_hits: u64,
223    /// Cache misses
224    cache_misses: u64,
225    /// Total entries in cache
226    cache_size: u64,
227    /// Total memory usage estimate
228    memory_usage_bytes: u64,
229    /// Cost savings from cache hits
230    cost_savings_usd: f64,
231    /// Time savings from cache hits
232    time_savings_ms: u64,
233}
234
235impl CacheStats {
236    fn new() -> Self {
237        Self {
238            total_requests: 0,
239            cache_hits: 0,
240            cache_misses: 0,
241            cache_size: 0,
242            memory_usage_bytes: 0,
243            cost_savings_usd: 0.0,
244            time_savings_ms: 0,
245        }
246    }
247
248    fn hit_rate(&self) -> f64 {
249        if self.total_requests == 0 {
250            0.0
251        } else {
252            (self.cache_hits as f64 / self.total_requests as f64) * 100.0
253        }
254    }
255
256    fn print_stats(&self) {
257        info!("=== Cache Statistics ===");
258        info!("Total requests: {}", self.total_requests);
259        info!("Cache hits: {}", self.cache_hits);
260        info!("Cache misses: {}", self.cache_misses);
261        info!("Hit rate: {:.2}%", self.hit_rate());
262        info!("Cache size: {} entries", self.cache_size);
263        info!(
264            "Memory usage: {:.2} KB",
265            self.memory_usage_bytes as f64 / 1024.0
266        );
267        info!("Cost savings: ${:.4}", self.cost_savings_usd);
268        info!("Time savings: {}ms", self.time_savings_ms);
269    }
270}
271
272/// In-memory cache with TTL support
273#[derive(Debug)]
274struct MemoryCache {
275    /// Cache storage
276    cache: Arc<Mutex<HashMap<CacheKey, CachedResponse>>>,
277    /// Cache statistics
278    stats: Arc<Mutex<CacheStats>>,
279    /// Default TTL for new entries
280    default_ttl: Duration,
281    /// Maximum cache size (number of entries)
282    max_size: usize,
283}
284
285impl MemoryCache {
286    /// Create a new memory cache
287    fn new(default_ttl: Duration, max_size: usize) -> Self {
288        Self {
289            cache: Arc::new(Mutex::new(HashMap::new())),
290            stats: Arc::new(Mutex::new(CacheStats::new())),
291            default_ttl,
292            max_size,
293        }
294    }
295
296    /// Get a cached response
297    async fn get(&self, key: &CacheKey) -> Option<String> {
298        let mut cache = self.cache.lock().unwrap();
299        let mut stats = self.stats.lock().unwrap();
300
301        stats.total_requests += 1;
302
303        if let Some(cached_response) = cache.get_mut(key) {
304            if !cached_response.is_expired() {
305                cached_response.mark_accessed();
306                stats.cache_hits += 1;
307                stats.cost_savings_usd += cached_response.token_usage.estimated_cost_usd;
308                stats.time_savings_ms += 500; // Estimate 500ms saved per cache hit
309
310                debug!("Cache hit for key: {}", key.to_string());
311                return Some(cached_response.content.clone());
312            }
313            // Remove expired entry
314            cache.remove(key);
315            debug!("Removed expired cache entry for key: {}", key.to_string());
316        }
317
318        stats.cache_misses += 1;
319        debug!("Cache miss for key: {}", key.to_string());
320        None
321    }
322
323    /// Store a response in the cache
324    async fn put(&self, key: CacheKey, content: String, token_usage: TokenUsageInfo) {
325        let mut cache = self.cache.lock().unwrap();
326        let mut stats = self.stats.lock().unwrap();
327
328        // Evict entries if cache is full
329        if cache.len() >= self.max_size {
330            self.evict_lru(&mut cache);
331        }
332
333        let cached_response = CachedResponse {
334            content,
335            cached_at: SystemTime::now()
336                .duration_since(UNIX_EPOCH)
337                .unwrap()
338                .as_secs(),
339            ttl_seconds: self.default_ttl.as_secs(),
340            token_usage,
341            metadata: HashMap::new(),
342            access_count: 0,
343            last_accessed: SystemTime::now()
344                .duration_since(UNIX_EPOCH)
345                .unwrap()
346                .as_secs(),
347        };
348
349        cache.insert(key.clone(), cached_response);
350        stats.cache_size = cache.len() as u64;
351
352        // Estimate memory usage
353        let entry_size = key.to_string().len() + 1000; // Rough estimate
354        stats.memory_usage_bytes += entry_size as u64;
355
356        debug!("Cached response for key: {}", key.to_string());
357    }
358
359    /// Evict least recently used entries
360    fn evict_lru(&self, cache: &mut HashMap<CacheKey, CachedResponse>) {
361        // Find the entry with the oldest last_accessed time
362        if let Some((lru_key, _)) = cache
363            .iter()
364            .min_by_key(|(_, response)| response.last_accessed)
365            .map(|(k, v)| (k.clone(), v.clone()))
366        {
367            cache.remove(&lru_key);
368            debug!("Evicted LRU entry: {}", lru_key.to_string());
369        }
370    }
371
372    /// Clear expired entries
373    async fn cleanup_expired(&self) {
374        let mut cache = self.cache.lock().unwrap();
375        let mut stats = self.stats.lock().unwrap();
376
377        let initial_size = cache.len();
378        cache.retain(|_, response| !response.is_expired());
379        let removed_count = initial_size - cache.len();
380
381        stats.cache_size = cache.len() as u64;
382
383        if removed_count > 0 {
384            info!("Cleaned up {} expired cache entries", removed_count);
385        }
386    }
387
388    /// Get cache statistics
389    fn get_stats(&self) -> CacheStats {
390        self.stats.lock().unwrap().clone()
391    }
392
393    /// Clear all cache entries
394    async fn clear(&self) {
395        let mut cache = self.cache.lock().unwrap();
396        let mut stats = self.stats.lock().unwrap();
397
398        cache.clear();
399        stats.cache_size = 0;
400        stats.memory_usage_bytes = 0;
401
402        info!("Cache cleared");
403    }
404}
405
406/// File-based persistent cache
407#[derive(Debug)]
408struct FileCache {
409    /// Cache directory
410    cache_dir: PathBuf,
411    /// In-memory index for faster lookups
412    index: Arc<Mutex<HashMap<CacheKey, PathBuf>>>,
413    /// Statistics
414    stats: Arc<Mutex<CacheStats>>,
415    /// Default TTL
416    default_ttl: Duration,
417}
418
419impl FileCache {
420    /// Create a new file-based cache
421    fn new(cache_dir: &Path, default_ttl: Duration) -> Result<Self> {
422        fs::create_dir_all(cache_dir).map_err(|e| {
423            Error::InvalidRequest(format!("Failed to create cache directory: {}", e))
424        })?;
425
426        let cache = Self {
427            cache_dir: cache_dir.to_path_buf(),
428            index: Arc::new(Mutex::new(HashMap::new())),
429            stats: Arc::new(Mutex::new(CacheStats::new())),
430            default_ttl,
431        };
432
433        // Build index from existing files
434        cache.rebuild_index()?;
435
436        Ok(cache)
437    }
438
439    /// Rebuild the in-memory index from disk
440    fn rebuild_index(&self) -> Result<()> {
441        let mut index = self.index.lock().unwrap();
442        index.clear();
443
444        if let Ok(entries) = fs::read_dir(&self.cache_dir) {
445            for entry in entries.flatten() {
446                if entry.path().extension().and_then(|s| s.to_str()) == Some("json") {
447                    if let Some(stem) = entry.path().file_stem().and_then(|s| s.to_str()) {
448                        // Parse cache key from filename
449                        let parts: Vec<&str> = stem.split(':').collect();
450                        if parts.len() >= 3 {
451                            let key = CacheKey {
452                                endpoint: parts[0].to_string(),
453                                model: parts[1].to_string(),
454                                params_hash: parts[2].parse().unwrap_or(0),
455                                user_id: parts.get(3).map(|s| s.to_string()),
456                            };
457                            index.insert(key, entry.path());
458                        }
459                    }
460                }
461            }
462        }
463
464        let mut stats = self.stats.lock().unwrap();
465        stats.cache_size = index.len() as u64;
466
467        info!("Rebuilt cache index with {} entries", index.len());
468        Ok(())
469    }
470
471    /// Get a cached response from disk
472    async fn get(&self, key: &CacheKey) -> Option<String> {
473        let index = self.index.lock().unwrap();
474        let mut stats = self.stats.lock().unwrap();
475
476        stats.total_requests += 1;
477
478        if let Some(file_path) = index.get(key) {
479            if let Ok(content) = fs::read_to_string(file_path) {
480                if let Ok(cached_response) = serde_json::from_str::<CachedResponse>(&content) {
481                    if !cached_response.is_expired() {
482                        stats.cache_hits += 1;
483                        stats.cost_savings_usd += cached_response.token_usage.estimated_cost_usd;
484                        stats.time_savings_ms += 200; // File access is faster than API
485
486                        debug!("File cache hit for key: {}", key.to_string());
487                        return Some(cached_response.content);
488                    }
489                    // Remove expired file
490                    let _ = fs::remove_file(file_path);
491                    debug!("Removed expired cache file: {:?}", file_path);
492                }
493            }
494        }
495
496        stats.cache_misses += 1;
497        debug!("File cache miss for key: {}", key.to_string());
498        None
499    }
500
501    /// Store a response in the file cache
502    async fn put(&self, key: CacheKey, content: String, token_usage: TokenUsageInfo) -> Result<()> {
503        let cached_response = CachedResponse {
504            content,
505            cached_at: SystemTime::now()
506                .duration_since(UNIX_EPOCH)
507                .unwrap()
508                .as_secs(),
509            ttl_seconds: self.default_ttl.as_secs(),
510            token_usage,
511            metadata: HashMap::new(),
512            access_count: 0,
513            last_accessed: SystemTime::now()
514                .duration_since(UNIX_EPOCH)
515                .unwrap()
516                .as_secs(),
517        };
518
519        let filename = format!("{}.json", key.to_string());
520        let file_path = self.cache_dir.join(filename);
521
522        let json_content = serde_json::to_string_pretty(&cached_response).map_err(|e| {
523            Error::InvalidRequest(format!("Failed to serialize cache entry: {}", e))
524        })?;
525
526        fs::write(&file_path, json_content)
527            .map_err(|e| Error::InvalidRequest(format!("Failed to write cache file: {}", e)))?;
528
529        // Update index
530        let mut index = self.index.lock().unwrap();
531        let mut stats = self.stats.lock().unwrap();
532
533        index.insert(key.clone(), file_path);
534        stats.cache_size = index.len() as u64;
535
536        debug!("Stored cache entry to file: {}", key.to_string());
537        Ok(())
538    }
539
540    /// Clean up expired cache files
541    async fn cleanup_expired(&self) -> Result<()> {
542        let index = self.index.lock().unwrap();
543        let mut removed_count = 0;
544
545        for (key, file_path) in index.iter() {
546            if let Ok(content) = fs::read_to_string(file_path) {
547                if let Ok(cached_response) = serde_json::from_str::<CachedResponse>(&content) {
548                    if cached_response.is_expired() {
549                        if fs::remove_file(file_path).is_ok() {
550                            removed_count += 1;
551                            debug!("Removed expired cache file: {}", key.to_string());
552                        }
553                    }
554                }
555            }
556        }
557
558        if removed_count > 0 {
559            info!("Cleaned up {} expired file cache entries", removed_count);
560            // Rebuild index to reflect deletions
561            drop(index);
562            self.rebuild_index()?;
563        }
564
565        Ok(())
566    }
567
568    /// Get cache statistics
569    fn get_stats(&self) -> CacheStats {
570        self.stats.lock().unwrap().clone()
571    }
572}
573
574/// Smart caching client that combines multiple cache layers
575#[derive(Debug)]
576struct CachingClient {
577    client: Client,
578    memory_cache: MemoryCache,
579    file_cache: Option<FileCache>,
580    cache_strategy: CacheStrategy,
581}
582
583/// Cache strategy configuration
584#[derive(Debug, Clone)]
585struct CacheStrategy {
586    /// Whether to cache all responses or use selective caching
587    cache_all: bool,
588    /// Minimum response length to cache (avoid caching very short responses)
589    min_response_length: usize,
590    /// Cache deterministic requests (temperature = 0)
591    cache_deterministic: bool,
592    /// Cache expensive requests (high token count)
593    cache_expensive: bool,
594    /// Minimum cost threshold for caching
595    min_cost_threshold: f64,
596    /// Whether to enable cache warming
597    enable_warming: bool,
598}
599
600impl Default for CacheStrategy {
601    fn default() -> Self {
602        Self {
603            cache_all: false,
604            min_response_length: 50,
605            cache_deterministic: true,
606            cache_expensive: true,
607            min_cost_threshold: 0.001, // $0.001
608            enable_warming: false,
609        }
610    }
611}
612
613impl CachingClient {
614    /// Create a new caching client
615    fn new(client: Client, cache_dir: Option<&Path>) -> Result<Self> {
616        let memory_cache = MemoryCache::new(Duration::from_secs(60 * 60), 1000);
617
618        let file_cache = if let Some(dir) = cache_dir {
619            Some(FileCache::new(dir, Duration::from_secs(24 * 60 * 60))?)
620        } else {
621            None
622        };
623
624        Ok(Self {
625            client,
626            memory_cache,
627            file_cache,
628            cache_strategy: CacheStrategy::default(),
629        })
630    }
631
632    /// Configure caching strategy
633    fn with_strategy(mut self, strategy: CacheStrategy) -> Self {
634        self.cache_strategy = strategy;
635        self
636    }
637
638    /// Send a chat completion request with caching
639    async fn chat_completion(
640        &self,
641        params: ChatCompletionParams,
642        user_id: Option<String>,
643    ) -> Result<String> {
644        let cache_key = CacheKey::new("/v1/chat/completions", &params, user_id);
645
646        // Try memory cache first
647        if let Some(cached_content) = self.memory_cache.get(&cache_key).await {
648            debug!("Retrieved from memory cache");
649            return Ok(cached_content);
650        }
651
652        // Try file cache second
653        if let Some(file_cache) = &self.file_cache {
654            if let Some(cached_content) = file_cache.get(&cache_key).await {
655                debug!("Retrieved from file cache, promoting to memory cache");
656
657                // Promote to memory cache for faster future access
658                let token_usage = TokenUsageInfo {
659                    prompt_tokens: 0,
660                    completion_tokens: 0,
661                    total_tokens: 0,
662                    estimated_cost_usd: 0.0,
663                };
664                self.memory_cache
665                    .put(cache_key, cached_content.clone(), token_usage)
666                    .await;
667
668                return Ok(cached_content);
669            }
670        }
671
672        // Cache miss - make actual API call
673        let response = self.make_api_call(&params).await?;
674
675        // Calculate token usage and cost
676        let token_usage = self.estimate_token_usage(&params, &response);
677
678        // Decide whether to cache based on strategy
679        if self.should_cache(&params, &response, &token_usage) {
680            // Store in memory cache
681            self.memory_cache
682                .put(cache_key.clone(), response.clone(), token_usage.clone())
683                .await;
684
685            // Store in file cache if available
686            if let Some(file_cache) = &self.file_cache {
687                if let Err(e) = file_cache
688                    .put(cache_key, response.clone(), token_usage)
689                    .await
690                {
691                    warn!("Failed to store in file cache: {}", e);
692                }
693            }
694        }
695
696        Ok(response)
697    }
698
699    /// Make the actual API call (simulated)
700    async fn make_api_call(&self, params: &ChatCompletionParams) -> Result<String> {
701        // Simulate API call delay
702        sleep(Duration::from_millis(500)).await;
703
704        // Simulate API response based on parameters
705        let response = match params.messages.first() {
706            Some(msg) if msg.content.contains("error") => {
707                return Err(Error::InvalidRequest("Simulated API error".to_string()));
708            }
709            Some(msg) => {
710                format!(
711                    "Response to: {}",
712                    msg.content.chars().take(50).collect::<String>()
713                )
714            }
715            None => "Empty response".to_string(),
716        };
717
718        Ok(response)
719    }
720
721    /// Estimate token usage and cost for a request/response pair
722    fn estimate_token_usage(
723        &self,
724        params: &ChatCompletionParams,
725        response: &str,
726    ) -> TokenUsageInfo {
727        // Rough token estimation (1 token ≈ 4 characters for English)
728        let prompt_text: String = params
729            .messages
730            .iter()
731            .map(|m| m.content.clone())
732            .collect::<Vec<_>>()
733            .join(" ");
734
735        let prompt_tokens = (prompt_text.len() / 4) as i32;
736        let completion_tokens = (response.len() / 4) as i32;
737        let total_tokens = prompt_tokens + completion_tokens;
738
739        // Estimate cost based on model (simplified)
740        let cost_per_1k_tokens = match params.model.as_str() {
741            "gpt-4" => 0.03,
742            "gpt-3.5-turbo" => 0.002,
743            _ => 0.002,
744        };
745
746        let estimated_cost_usd = (total_tokens as f64 / 1000.0) * cost_per_1k_tokens;
747
748        TokenUsageInfo {
749            prompt_tokens,
750            completion_tokens,
751            total_tokens,
752            estimated_cost_usd,
753        }
754    }
755
756    /// Determine whether to cache a response based on strategy
757    fn should_cache(
758        &self,
759        params: &ChatCompletionParams,
760        response: &str,
761        token_usage: &TokenUsageInfo,
762    ) -> bool {
763        if self.cache_strategy.cache_all {
764            return true;
765        }
766
767        // Check minimum response length
768        if response.len() < self.cache_strategy.min_response_length {
769            return false;
770        }
771
772        // Check deterministic requests
773        if self.cache_strategy.cache_deterministic {
774            if let Some(temp) = params.temperature {
775                if temp == 0.0 {
776                    return true;
777                }
778            }
779        }
780
781        // Check expensive requests
782        if self.cache_strategy.cache_expensive {
783            if token_usage.estimated_cost_usd >= self.cache_strategy.min_cost_threshold {
784                return true;
785            }
786        }
787
788        false
789    }
790
791    /// Warm the cache with common requests
792    async fn warm_cache(&self, common_prompts: Vec<ChatCompletionParams>) -> Result<()> {
793        if !self.cache_strategy.enable_warming {
794            return Ok(());
795        }
796
797        info!(
798            "Starting cache warming with {} prompts",
799            common_prompts.len()
800        );
801
802        for (i, params) in common_prompts.iter().enumerate() {
803            info!("Warming cache {}/{}", i + 1, common_prompts.len());
804
805            match self.chat_completion(params.clone(), None).await {
806                Ok(_) => debug!("Cache warmed for prompt {}", i + 1),
807                Err(e) => warn!("Failed to warm cache for prompt {}: {}", i + 1, e),
808            }
809
810            // Small delay to avoid overwhelming the API
811            sleep(Duration::from_millis(100)).await;
812        }
813
814        info!("Cache warming completed");
815        Ok(())
816    }
817
818    /// Get combined cache statistics
819    fn get_cache_stats(&self) -> (CacheStats, Option<CacheStats>) {
820        let memory_stats = self.memory_cache.get_stats();
821        let file_stats = self.file_cache.as_ref().map(|cache| cache.get_stats());
822
823        (memory_stats, file_stats)
824    }
825
826    /// Clean up expired entries in all caches
827    async fn cleanup_expired(&self) -> Result<()> {
828        self.memory_cache.cleanup_expired().await;
829
830        if let Some(file_cache) = &self.file_cache {
831            file_cache.cleanup_expired().await?;
832        }
833
834        Ok(())
835    }
836}
837
838#[tokio::main]
839async fn main() -> Result<()> {
840    // Initialize logging
841    tracing_subscriber::fmt()
842        .with_env_filter(
843            tracing_subscriber::EnvFilter::try_from_default_env()
844                .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")),
845        )
846        .init();
847
848    info!("Starting caching strategies example");
849
850    // Create a test client
851    let config = Config::builder().api_key("test-api-key").build();
852    let client = Client::builder(config)?.build();
853
854    // Example 1: Basic memory caching
855    info!("=== Example 1: Memory Caching ===");
856
857    let cache_dir = std::env::temp_dir().join("openai_cache");
858    let caching_client =
859        CachingClient::new(client, Some(&cache_dir))?.with_strategy(CacheStrategy {
860            cache_all: true,
861            min_response_length: 10,
862            cache_deterministic: true,
863            cache_expensive: true,
864            min_cost_threshold: 0.0,
865            enable_warming: false,
866        });
867
868    // Test the same request multiple times to demonstrate caching
869    let test_params = ChatCompletionParams {
870        model: "gpt-3.5-turbo".to_string(),
871        messages: vec![
872            ChatMessage::system("You are a helpful assistant."),
873            ChatMessage::user("What is the capital of France?"),
874        ],
875        temperature: Some(0.0), // Deterministic for caching
876        max_tokens: Some(100),
877        top_p: None,
878        frequency_penalty: None,
879        presence_penalty: None,
880    };
881
882    info!("Making first request (should be cache miss)");
883    let start_time = Instant::now();
884    let response1 = caching_client
885        .chat_completion(test_params.clone(), None)
886        .await?;
887    let first_duration = start_time.elapsed();
888    info!("First response: {} (took {:?})", response1, first_duration);
889
890    info!("Making second request (should be cache hit)");
891    let start_time = Instant::now();
892    let response2 = caching_client
893        .chat_completion(test_params.clone(), None)
894        .await?;
895    let second_duration = start_time.elapsed();
896    info!(
897        "Second response: {} (took {:?})",
898        response2, second_duration
899    );
900
901    // Verify responses are identical and second is faster
902    assert_eq!(response1, response2);
903    info!(
904        " Cache working: responses identical, second request {:?} faster",
905        first_duration.saturating_sub(second_duration)
906    );
907
908    // Example 2: User-specific caching
909    info!("\n=== Example 2: User-Specific Caching ===");
910
911    let user_params = ChatCompletionParams {
912        model: "gpt-3.5-turbo".to_string(),
913        messages: vec![ChatMessage::user(
914            "What is my favorite programming language?",
915        )],
916        temperature: Some(0.7),
917        max_tokens: Some(50),
918        top_p: None,
919        frequency_penalty: None,
920        presence_penalty: None,
921    };
922
923    // Same request for different users should be cached separately
924    let user1_response = caching_client
925        .chat_completion(user_params.clone(), Some("user1".to_string()))
926        .await?;
927    let user2_response = caching_client
928        .chat_completion(user_params.clone(), Some("user2".to_string()))
929        .await?;
930
931    info!("User 1 response: {}", user1_response);
932    info!("User 2 response: {}", user2_response);
933
934    // Test cache hit for user 1
935    let user1_cached = caching_client
936        .chat_completion(user_params, Some("user1".to_string()))
937        .await?;
938    assert_eq!(user1_response, user1_cached);
939    info!(" User-specific caching working");
940
941    // Example 3: Conditional caching based on parameters
942    info!("\n=== Example 3: Conditional Caching ===");
943
944    let conditional_client = CachingClient::new(
945        Client::builder(Config::builder().api_key("test-api-key").build())?.build(),
946        Some(&cache_dir),
947    )?
948    .with_strategy(CacheStrategy {
949        cache_all: false,
950        min_response_length: 20,
951        cache_deterministic: true,
952        cache_expensive: true,
953        min_cost_threshold: 0.001,
954        enable_warming: false,
955    });
956
957    // Test deterministic request (should be cached)
958    let deterministic_params = ChatCompletionParams {
959        model: "gpt-3.5-turbo".to_string(),
960        messages: vec![ChatMessage::user("Count from 1 to 5")],
961        temperature: Some(0.0), // Deterministic
962        max_tokens: Some(50),
963        top_p: None,
964        frequency_penalty: None,
965        presence_penalty: None,
966    };
967
968    info!("Testing deterministic request (should cache)");
969    conditional_client
970        .chat_completion(deterministic_params.clone(), None)
971        .await?;
972    conditional_client
973        .chat_completion(deterministic_params, None)
974        .await?;
975
976    // Test non-deterministic request (might not be cached based on strategy)
977    let creative_params = ChatCompletionParams {
978        model: "gpt-3.5-turbo".to_string(),
979        messages: vec![ChatMessage::user("Write a creative story")],
980        temperature: Some(1.0), // Creative
981        max_tokens: Some(50),
982        top_p: None,
983        frequency_penalty: None,
984        presence_penalty: None,
985    };
986
987    info!("Testing creative request (might not cache)");
988    conditional_client
989        .chat_completion(creative_params.clone(), None)
990        .await?;
991    conditional_client
992        .chat_completion(creative_params, None)
993        .await?;
994
995    // Example 4: Cache warming
996    info!("\n=== Example 4: Cache Warming ===");
997
998    let warming_client = CachingClient::new(
999        Client::builder(Config::builder().api_key("test-api-key").build())?.build(),
1000        Some(&cache_dir),
1001    )?
1002    .with_strategy(CacheStrategy {
1003        cache_all: true,
1004        enable_warming: true,
1005        ..Default::default()
1006    });
1007
1008    // Define common prompts for warming
1009    let common_prompts = vec![
1010        ChatCompletionParams {
1011            model: "gpt-3.5-turbo".to_string(),
1012            messages: vec![ChatMessage::user("What is machine learning?")],
1013            temperature: Some(0.0),
1014            max_tokens: Some(100),
1015            top_p: None,
1016            frequency_penalty: None,
1017            presence_penalty: None,
1018        },
1019        ChatCompletionParams {
1020            model: "gpt-3.5-turbo".to_string(),
1021            messages: vec![ChatMessage::user("Explain quantum computing")],
1022            temperature: Some(0.0),
1023            max_tokens: Some(100),
1024            top_p: None,
1025            frequency_penalty: None,
1026            presence_penalty: None,
1027        },
1028        ChatCompletionParams {
1029            model: "gpt-3.5-turbo".to_string(),
1030            messages: vec![ChatMessage::user("What is blockchain?")],
1031            temperature: Some(0.0),
1032            max_tokens: Some(100),
1033            top_p: None,
1034            frequency_penalty: None,
1035            presence_penalty: None,
1036        },
1037    ];
1038
1039    warming_client.warm_cache(common_prompts.clone()).await?;
1040
1041    // Test that warmed entries are available
1042    info!("Testing warmed cache entries");
1043    for (i, params) in common_prompts.iter().enumerate() {
1044        let start_time = Instant::now();
1045        let response = warming_client.chat_completion(params.clone(), None).await?;
1046        let duration = start_time.elapsed();
1047        info!(
1048            "Warmed entry {} retrieved in {:?}: {}",
1049            i + 1,
1050            duration,
1051            response
1052        );
1053    }
1054
1055    // Example 5: Cache analytics and optimization
1056    info!("\n=== Example 5: Cache Analytics ===");
1057
1058    let (memory_stats, file_stats) = caching_client.get_cache_stats();
1059
1060    info!("Memory cache statistics:");
1061    memory_stats.print_stats();
1062
1063    if let Some(file_stats) = file_stats {
1064        info!("\nFile cache statistics:");
1065        file_stats.print_stats();
1066    }
1067
1068    // Example 6: Cache cleanup and maintenance
1069    info!("\n=== Example 6: Cache Maintenance ===");
1070
1071    info!("Performing cache cleanup");
1072    caching_client.cleanup_expired().await?;
1073
1074    // Show updated statistics
1075    let (memory_stats_after, file_stats_after) = caching_client.get_cache_stats();
1076    info!("Statistics after cleanup:");
1077    memory_stats_after.print_stats();
1078
1079    if let Some(ref file_stats_after) = file_stats_after {
1080        info!("\nFile cache after cleanup:");
1081        file_stats_after.print_stats();
1082    }
1083
1084    // Example 7: Cost analysis
1085    info!("\n=== Example 7: Cost Analysis ===");
1086
1087    let total_cost_savings = memory_stats_after.cost_savings_usd
1088        + file_stats_after
1089            .as_ref()
1090            .map(|s| s.cost_savings_usd)
1091            .unwrap_or(0.0);
1092
1093    let total_time_savings = memory_stats_after.time_savings_ms
1094        + file_stats_after
1095            .as_ref()
1096            .map(|s| s.time_savings_ms)
1097            .unwrap_or(0);
1098
1099    info!("=== Overall Cache Performance ===");
1100    info!("Total cost savings: ${:.4}", total_cost_savings);
1101    info!("Total time savings: {}ms", total_time_savings);
1102    info!("Cache efficiency: Significant improvement in response times and cost reduction");
1103
1104    // Cleanup
1105    info!("Cleaning up cache directory");
1106    if cache_dir.exists() {
1107        fs::remove_dir_all(&cache_dir)
1108            .map_err(|e| Error::InvalidRequest(format!("Cleanup failed: {}", e)))?;
1109    }
1110
1111    info!("Caching strategies example completed successfully!");
1112    Ok(())
1113}
caching_strategies/caching_strategies.rs

caching_strategies/
caching_strategies.rs