1#![allow(dead_code)]
3#![allow(clippy::cast_possible_truncation)]
4#![allow(clippy::cast_possible_wrap)]
5#![allow(clippy::cast_lossless)]
6#![allow(clippy::match_same_arms)]
7#![allow(clippy::collapsible_if)]
8#![allow(clippy::redundant_closure_for_method_calls)]
9#![allow(clippy::too_many_lines)]
10#![allow(clippy::missing_const_for_fn)]
11#![allow(clippy::uninlined_format_args)]
12#![allow(clippy::unused_async)]
13#![allow(clippy::unreadable_literal)]
14#![allow(clippy::map_unwrap_or)]
15#![allow(clippy::struct_excessive_bools)]
16#![allow(clippy::unused_self)]
17#![allow(clippy::significant_drop_tightening)]
18#![allow(clippy::inherent_to_string)]
19#![allow(clippy::option_if_let_else)]
20#![allow(clippy::cast_precision_loss)]
21#![allow(clippy::unnecessary_wraps)]
22#![allow(clippy::inefficient_to_string)]
23use openai_ergonomic::{Client, Config, Error, Result};
44use serde::{Deserialize, Serialize};
45use std::collections::HashMap;
46use std::fs;
47use std::hash::{Hash, Hasher};
48use std::path::{Path, PathBuf};
49use std::sync::{Arc, Mutex};
50use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
51use tokio::time::sleep;
52use tracing::{debug, info, warn};
53
54#[derive(Debug, Clone, PartialEq, Eq, Hash)]
56struct CacheKey {
57 endpoint: String,
59 params_hash: u64,
61 model: String,
63 user_id: Option<String>,
65}
66
67impl CacheKey {
68 fn new(endpoint: &str, params: &ChatCompletionParams, user_id: Option<String>) -> Self {
70 let mut hasher = std::collections::hash_map::DefaultHasher::new();
71 params.hash(&mut hasher);
72 let params_hash = hasher.finish();
73
74 Self {
75 endpoint: endpoint.to_string(),
76 params_hash,
77 model: params.model.clone(),
78 user_id,
79 }
80 }
81
82 fn to_string(&self) -> String {
84 match &self.user_id {
85 Some(user) => format!(
86 "{}:{}:{}:{}",
87 self.endpoint, self.model, self.params_hash, user
88 ),
89 None => format!("{}:{}:{}", self.endpoint, self.model, self.params_hash),
90 }
91 }
92}
93
94#[derive(Debug, Clone, Serialize, Deserialize)]
96struct CachedResponse {
97 content: String,
99 cached_at: u64,
101 ttl_seconds: u64,
103 token_usage: TokenUsageInfo,
105 metadata: HashMap<String, String>,
107 access_count: u64,
109 last_accessed: u64,
111}
112
113impl CachedResponse {
114 fn is_expired(&self) -> bool {
116 let now = SystemTime::now()
117 .duration_since(UNIX_EPOCH)
118 .unwrap()
119 .as_secs();
120
121 now > self.cached_at + self.ttl_seconds
122 }
123
124 fn mark_accessed(&mut self) {
126 self.access_count += 1;
127 self.last_accessed = SystemTime::now()
128 .duration_since(UNIX_EPOCH)
129 .unwrap()
130 .as_secs();
131 }
132
133 fn age_seconds(&self) -> u64 {
135 let now = SystemTime::now()
136 .duration_since(UNIX_EPOCH)
137 .unwrap()
138 .as_secs();
139
140 now.saturating_sub(self.cached_at)
141 }
142}
143
144#[derive(Debug, Clone, Serialize, Deserialize)]
146struct TokenUsageInfo {
147 prompt_tokens: i32,
148 completion_tokens: i32,
149 total_tokens: i32,
150 estimated_cost_usd: f64,
151}
152
153#[derive(Debug, Clone, Serialize, Deserialize)]
155struct ChatCompletionParams {
156 model: String,
157 messages: Vec<ChatMessage>,
158 temperature: Option<f64>,
159 max_tokens: Option<i32>,
160 top_p: Option<f64>,
161 frequency_penalty: Option<f64>,
162 presence_penalty: Option<f64>,
163}
164
165impl std::hash::Hash for ChatCompletionParams {
166 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
167 self.model.hash(state);
168 self.messages.hash(state);
169 if let Some(temp) = self.temperature {
171 temp.to_bits().hash(state);
172 }
173 self.max_tokens.hash(state);
174 if let Some(top_p) = self.top_p {
175 top_p.to_bits().hash(state);
176 }
177 if let Some(freq_penalty) = self.frequency_penalty {
178 freq_penalty.to_bits().hash(state);
179 }
180 if let Some(pres_penalty) = self.presence_penalty {
181 pres_penalty.to_bits().hash(state);
182 }
183 }
184}
185
186#[derive(Debug, Clone, Hash, Serialize, Deserialize)]
188struct ChatMessage {
189 role: String,
190 content: String,
191}
192
193impl ChatMessage {
194 fn user(content: &str) -> Self {
195 Self {
196 role: "user".to_string(),
197 content: content.to_string(),
198 }
199 }
200
201 fn system(content: &str) -> Self {
202 Self {
203 role: "system".to_string(),
204 content: content.to_string(),
205 }
206 }
207
208 fn assistant(content: &str) -> Self {
209 Self {
210 role: "assistant".to_string(),
211 content: content.to_string(),
212 }
213 }
214}
215
216#[derive(Debug, Clone)]
218struct CacheStats {
219 total_requests: u64,
221 cache_hits: u64,
223 cache_misses: u64,
225 cache_size: u64,
227 memory_usage_bytes: u64,
229 cost_savings_usd: f64,
231 time_savings_ms: u64,
233}
234
235impl CacheStats {
236 fn new() -> Self {
237 Self {
238 total_requests: 0,
239 cache_hits: 0,
240 cache_misses: 0,
241 cache_size: 0,
242 memory_usage_bytes: 0,
243 cost_savings_usd: 0.0,
244 time_savings_ms: 0,
245 }
246 }
247
248 fn hit_rate(&self) -> f64 {
249 if self.total_requests == 0 {
250 0.0
251 } else {
252 (self.cache_hits as f64 / self.total_requests as f64) * 100.0
253 }
254 }
255
256 fn print_stats(&self) {
257 info!("=== Cache Statistics ===");
258 info!("Total requests: {}", self.total_requests);
259 info!("Cache hits: {}", self.cache_hits);
260 info!("Cache misses: {}", self.cache_misses);
261 info!("Hit rate: {:.2}%", self.hit_rate());
262 info!("Cache size: {} entries", self.cache_size);
263 info!(
264 "Memory usage: {:.2} KB",
265 self.memory_usage_bytes as f64 / 1024.0
266 );
267 info!("Cost savings: ${:.4}", self.cost_savings_usd);
268 info!("Time savings: {}ms", self.time_savings_ms);
269 }
270}
271
272#[derive(Debug)]
274struct MemoryCache {
275 cache: Arc<Mutex<HashMap<CacheKey, CachedResponse>>>,
277 stats: Arc<Mutex<CacheStats>>,
279 default_ttl: Duration,
281 max_size: usize,
283}
284
285impl MemoryCache {
286 fn new(default_ttl: Duration, max_size: usize) -> Self {
288 Self {
289 cache: Arc::new(Mutex::new(HashMap::new())),
290 stats: Arc::new(Mutex::new(CacheStats::new())),
291 default_ttl,
292 max_size,
293 }
294 }
295
296 async fn get(&self, key: &CacheKey) -> Option<String> {
298 let mut cache = self.cache.lock().unwrap();
299 let mut stats = self.stats.lock().unwrap();
300
301 stats.total_requests += 1;
302
303 if let Some(cached_response) = cache.get_mut(key) {
304 if !cached_response.is_expired() {
305 cached_response.mark_accessed();
306 stats.cache_hits += 1;
307 stats.cost_savings_usd += cached_response.token_usage.estimated_cost_usd;
308 stats.time_savings_ms += 500; debug!("Cache hit for key: {}", key.to_string());
311 return Some(cached_response.content.clone());
312 }
313 cache.remove(key);
315 debug!("Removed expired cache entry for key: {}", key.to_string());
316 }
317
318 stats.cache_misses += 1;
319 debug!("Cache miss for key: {}", key.to_string());
320 None
321 }
322
323 async fn put(&self, key: CacheKey, content: String, token_usage: TokenUsageInfo) {
325 let mut cache = self.cache.lock().unwrap();
326 let mut stats = self.stats.lock().unwrap();
327
328 if cache.len() >= self.max_size {
330 self.evict_lru(&mut cache);
331 }
332
333 let cached_response = CachedResponse {
334 content,
335 cached_at: SystemTime::now()
336 .duration_since(UNIX_EPOCH)
337 .unwrap()
338 .as_secs(),
339 ttl_seconds: self.default_ttl.as_secs(),
340 token_usage,
341 metadata: HashMap::new(),
342 access_count: 0,
343 last_accessed: SystemTime::now()
344 .duration_since(UNIX_EPOCH)
345 .unwrap()
346 .as_secs(),
347 };
348
349 cache.insert(key.clone(), cached_response);
350 stats.cache_size = cache.len() as u64;
351
352 let entry_size = key.to_string().len() + 1000; stats.memory_usage_bytes += entry_size as u64;
355
356 debug!("Cached response for key: {}", key.to_string());
357 }
358
359 fn evict_lru(&self, cache: &mut HashMap<CacheKey, CachedResponse>) {
361 if let Some((lru_key, _)) = cache
363 .iter()
364 .min_by_key(|(_, response)| response.last_accessed)
365 .map(|(k, v)| (k.clone(), v.clone()))
366 {
367 cache.remove(&lru_key);
368 debug!("Evicted LRU entry: {}", lru_key.to_string());
369 }
370 }
371
372 async fn cleanup_expired(&self) {
374 let mut cache = self.cache.lock().unwrap();
375 let mut stats = self.stats.lock().unwrap();
376
377 let initial_size = cache.len();
378 cache.retain(|_, response| !response.is_expired());
379 let removed_count = initial_size - cache.len();
380
381 stats.cache_size = cache.len() as u64;
382
383 if removed_count > 0 {
384 info!("Cleaned up {} expired cache entries", removed_count);
385 }
386 }
387
388 fn get_stats(&self) -> CacheStats {
390 self.stats.lock().unwrap().clone()
391 }
392
393 async fn clear(&self) {
395 let mut cache = self.cache.lock().unwrap();
396 let mut stats = self.stats.lock().unwrap();
397
398 cache.clear();
399 stats.cache_size = 0;
400 stats.memory_usage_bytes = 0;
401
402 info!("Cache cleared");
403 }
404}
405
406#[derive(Debug)]
408struct FileCache {
409 cache_dir: PathBuf,
411 index: Arc<Mutex<HashMap<CacheKey, PathBuf>>>,
413 stats: Arc<Mutex<CacheStats>>,
415 default_ttl: Duration,
417}
418
419impl FileCache {
420 fn new(cache_dir: &Path, default_ttl: Duration) -> Result<Self> {
422 fs::create_dir_all(cache_dir).map_err(|e| {
423 Error::InvalidRequest(format!("Failed to create cache directory: {}", e))
424 })?;
425
426 let cache = Self {
427 cache_dir: cache_dir.to_path_buf(),
428 index: Arc::new(Mutex::new(HashMap::new())),
429 stats: Arc::new(Mutex::new(CacheStats::new())),
430 default_ttl,
431 };
432
433 cache.rebuild_index()?;
435
436 Ok(cache)
437 }
438
439 fn rebuild_index(&self) -> Result<()> {
441 let mut index = self.index.lock().unwrap();
442 index.clear();
443
444 if let Ok(entries) = fs::read_dir(&self.cache_dir) {
445 for entry in entries.flatten() {
446 if entry.path().extension().and_then(|s| s.to_str()) == Some("json") {
447 if let Some(stem) = entry.path().file_stem().and_then(|s| s.to_str()) {
448 let parts: Vec<&str> = stem.split(':').collect();
450 if parts.len() >= 3 {
451 let key = CacheKey {
452 endpoint: parts[0].to_string(),
453 model: parts[1].to_string(),
454 params_hash: parts[2].parse().unwrap_or(0),
455 user_id: parts.get(3).map(|s| s.to_string()),
456 };
457 index.insert(key, entry.path());
458 }
459 }
460 }
461 }
462 }
463
464 let mut stats = self.stats.lock().unwrap();
465 stats.cache_size = index.len() as u64;
466
467 info!("Rebuilt cache index with {} entries", index.len());
468 Ok(())
469 }
470
471 async fn get(&self, key: &CacheKey) -> Option<String> {
473 let index = self.index.lock().unwrap();
474 let mut stats = self.stats.lock().unwrap();
475
476 stats.total_requests += 1;
477
478 if let Some(file_path) = index.get(key) {
479 if let Ok(content) = fs::read_to_string(file_path) {
480 if let Ok(cached_response) = serde_json::from_str::<CachedResponse>(&content) {
481 if !cached_response.is_expired() {
482 stats.cache_hits += 1;
483 stats.cost_savings_usd += cached_response.token_usage.estimated_cost_usd;
484 stats.time_savings_ms += 200; debug!("File cache hit for key: {}", key.to_string());
487 return Some(cached_response.content);
488 }
489 let _ = fs::remove_file(file_path);
491 debug!("Removed expired cache file: {:?}", file_path);
492 }
493 }
494 }
495
496 stats.cache_misses += 1;
497 debug!("File cache miss for key: {}", key.to_string());
498 None
499 }
500
501 async fn put(&self, key: CacheKey, content: String, token_usage: TokenUsageInfo) -> Result<()> {
503 let cached_response = CachedResponse {
504 content,
505 cached_at: SystemTime::now()
506 .duration_since(UNIX_EPOCH)
507 .unwrap()
508 .as_secs(),
509 ttl_seconds: self.default_ttl.as_secs(),
510 token_usage,
511 metadata: HashMap::new(),
512 access_count: 0,
513 last_accessed: SystemTime::now()
514 .duration_since(UNIX_EPOCH)
515 .unwrap()
516 .as_secs(),
517 };
518
519 let filename = format!("{}.json", key.to_string());
520 let file_path = self.cache_dir.join(filename);
521
522 let json_content = serde_json::to_string_pretty(&cached_response).map_err(|e| {
523 Error::InvalidRequest(format!("Failed to serialize cache entry: {}", e))
524 })?;
525
526 fs::write(&file_path, json_content)
527 .map_err(|e| Error::InvalidRequest(format!("Failed to write cache file: {}", e)))?;
528
529 let mut index = self.index.lock().unwrap();
531 let mut stats = self.stats.lock().unwrap();
532
533 index.insert(key.clone(), file_path);
534 stats.cache_size = index.len() as u64;
535
536 debug!("Stored cache entry to file: {}", key.to_string());
537 Ok(())
538 }
539
540 async fn cleanup_expired(&self) -> Result<()> {
542 let index = self.index.lock().unwrap();
543 let mut removed_count = 0;
544
545 for (key, file_path) in index.iter() {
546 if let Ok(content) = fs::read_to_string(file_path) {
547 if let Ok(cached_response) = serde_json::from_str::<CachedResponse>(&content) {
548 if cached_response.is_expired() {
549 if fs::remove_file(file_path).is_ok() {
550 removed_count += 1;
551 debug!("Removed expired cache file: {}", key.to_string());
552 }
553 }
554 }
555 }
556 }
557
558 if removed_count > 0 {
559 info!("Cleaned up {} expired file cache entries", removed_count);
560 drop(index);
562 self.rebuild_index()?;
563 }
564
565 Ok(())
566 }
567
568 fn get_stats(&self) -> CacheStats {
570 self.stats.lock().unwrap().clone()
571 }
572}
573
574#[derive(Debug)]
576struct CachingClient {
577 client: Client,
578 memory_cache: MemoryCache,
579 file_cache: Option<FileCache>,
580 cache_strategy: CacheStrategy,
581}
582
583#[derive(Debug, Clone)]
585struct CacheStrategy {
586 cache_all: bool,
588 min_response_length: usize,
590 cache_deterministic: bool,
592 cache_expensive: bool,
594 min_cost_threshold: f64,
596 enable_warming: bool,
598}
599
600impl Default for CacheStrategy {
601 fn default() -> Self {
602 Self {
603 cache_all: false,
604 min_response_length: 50,
605 cache_deterministic: true,
606 cache_expensive: true,
607 min_cost_threshold: 0.001, enable_warming: false,
609 }
610 }
611}
612
613impl CachingClient {
614 fn new(client: Client, cache_dir: Option<&Path>) -> Result<Self> {
616 let memory_cache = MemoryCache::new(Duration::from_secs(60 * 60), 1000);
617
618 let file_cache = if let Some(dir) = cache_dir {
619 Some(FileCache::new(dir, Duration::from_secs(24 * 60 * 60))?)
620 } else {
621 None
622 };
623
624 Ok(Self {
625 client,
626 memory_cache,
627 file_cache,
628 cache_strategy: CacheStrategy::default(),
629 })
630 }
631
632 fn with_strategy(mut self, strategy: CacheStrategy) -> Self {
634 self.cache_strategy = strategy;
635 self
636 }
637
638 async fn chat_completion(
640 &self,
641 params: ChatCompletionParams,
642 user_id: Option<String>,
643 ) -> Result<String> {
644 let cache_key = CacheKey::new("/v1/chat/completions", ¶ms, user_id);
645
646 if let Some(cached_content) = self.memory_cache.get(&cache_key).await {
648 debug!("Retrieved from memory cache");
649 return Ok(cached_content);
650 }
651
652 if let Some(file_cache) = &self.file_cache {
654 if let Some(cached_content) = file_cache.get(&cache_key).await {
655 debug!("Retrieved from file cache, promoting to memory cache");
656
657 let token_usage = TokenUsageInfo {
659 prompt_tokens: 0,
660 completion_tokens: 0,
661 total_tokens: 0,
662 estimated_cost_usd: 0.0,
663 };
664 self.memory_cache
665 .put(cache_key, cached_content.clone(), token_usage)
666 .await;
667
668 return Ok(cached_content);
669 }
670 }
671
672 let response = self.make_api_call(¶ms).await?;
674
675 let token_usage = self.estimate_token_usage(¶ms, &response);
677
678 if self.should_cache(¶ms, &response, &token_usage) {
680 self.memory_cache
682 .put(cache_key.clone(), response.clone(), token_usage.clone())
683 .await;
684
685 if let Some(file_cache) = &self.file_cache {
687 if let Err(e) = file_cache
688 .put(cache_key, response.clone(), token_usage)
689 .await
690 {
691 warn!("Failed to store in file cache: {}", e);
692 }
693 }
694 }
695
696 Ok(response)
697 }
698
699 async fn make_api_call(&self, params: &ChatCompletionParams) -> Result<String> {
701 sleep(Duration::from_millis(500)).await;
703
704 let response = match params.messages.first() {
706 Some(msg) if msg.content.contains("error") => {
707 return Err(Error::InvalidRequest("Simulated API error".to_string()));
708 }
709 Some(msg) => {
710 format!(
711 "Response to: {}",
712 msg.content.chars().take(50).collect::<String>()
713 )
714 }
715 None => "Empty response".to_string(),
716 };
717
718 Ok(response)
719 }
720
721 fn estimate_token_usage(
723 &self,
724 params: &ChatCompletionParams,
725 response: &str,
726 ) -> TokenUsageInfo {
727 let prompt_text: String = params
729 .messages
730 .iter()
731 .map(|m| m.content.clone())
732 .collect::<Vec<_>>()
733 .join(" ");
734
735 let prompt_tokens = (prompt_text.len() / 4) as i32;
736 let completion_tokens = (response.len() / 4) as i32;
737 let total_tokens = prompt_tokens + completion_tokens;
738
739 let cost_per_1k_tokens = match params.model.as_str() {
741 "gpt-4" => 0.03,
742 "gpt-3.5-turbo" => 0.002,
743 _ => 0.002,
744 };
745
746 let estimated_cost_usd = (total_tokens as f64 / 1000.0) * cost_per_1k_tokens;
747
748 TokenUsageInfo {
749 prompt_tokens,
750 completion_tokens,
751 total_tokens,
752 estimated_cost_usd,
753 }
754 }
755
756 fn should_cache(
758 &self,
759 params: &ChatCompletionParams,
760 response: &str,
761 token_usage: &TokenUsageInfo,
762 ) -> bool {
763 if self.cache_strategy.cache_all {
764 return true;
765 }
766
767 if response.len() < self.cache_strategy.min_response_length {
769 return false;
770 }
771
772 if self.cache_strategy.cache_deterministic {
774 if let Some(temp) = params.temperature {
775 if temp == 0.0 {
776 return true;
777 }
778 }
779 }
780
781 if self.cache_strategy.cache_expensive {
783 if token_usage.estimated_cost_usd >= self.cache_strategy.min_cost_threshold {
784 return true;
785 }
786 }
787
788 false
789 }
790
791 async fn warm_cache(&self, common_prompts: Vec<ChatCompletionParams>) -> Result<()> {
793 if !self.cache_strategy.enable_warming {
794 return Ok(());
795 }
796
797 info!(
798 "Starting cache warming with {} prompts",
799 common_prompts.len()
800 );
801
802 for (i, params) in common_prompts.iter().enumerate() {
803 info!("Warming cache {}/{}", i + 1, common_prompts.len());
804
805 match self.chat_completion(params.clone(), None).await {
806 Ok(_) => debug!("Cache warmed for prompt {}", i + 1),
807 Err(e) => warn!("Failed to warm cache for prompt {}: {}", i + 1, e),
808 }
809
810 sleep(Duration::from_millis(100)).await;
812 }
813
814 info!("Cache warming completed");
815 Ok(())
816 }
817
818 fn get_cache_stats(&self) -> (CacheStats, Option<CacheStats>) {
820 let memory_stats = self.memory_cache.get_stats();
821 let file_stats = self.file_cache.as_ref().map(|cache| cache.get_stats());
822
823 (memory_stats, file_stats)
824 }
825
826 async fn cleanup_expired(&self) -> Result<()> {
828 self.memory_cache.cleanup_expired().await;
829
830 if let Some(file_cache) = &self.file_cache {
831 file_cache.cleanup_expired().await?;
832 }
833
834 Ok(())
835 }
836}
837
838#[tokio::main]
839async fn main() -> Result<()> {
840 tracing_subscriber::fmt()
842 .with_env_filter(
843 tracing_subscriber::EnvFilter::try_from_default_env()
844 .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")),
845 )
846 .init();
847
848 info!("Starting caching strategies example");
849
850 let config = Config::builder().api_key("test-api-key").build();
852 let client = Client::builder(config)?.build();
853
854 info!("=== Example 1: Memory Caching ===");
856
857 let cache_dir = std::env::temp_dir().join("openai_cache");
858 let caching_client =
859 CachingClient::new(client, Some(&cache_dir))?.with_strategy(CacheStrategy {
860 cache_all: true,
861 min_response_length: 10,
862 cache_deterministic: true,
863 cache_expensive: true,
864 min_cost_threshold: 0.0,
865 enable_warming: false,
866 });
867
868 let test_params = ChatCompletionParams {
870 model: "gpt-3.5-turbo".to_string(),
871 messages: vec![
872 ChatMessage::system("You are a helpful assistant."),
873 ChatMessage::user("What is the capital of France?"),
874 ],
875 temperature: Some(0.0), max_tokens: Some(100),
877 top_p: None,
878 frequency_penalty: None,
879 presence_penalty: None,
880 };
881
882 info!("Making first request (should be cache miss)");
883 let start_time = Instant::now();
884 let response1 = caching_client
885 .chat_completion(test_params.clone(), None)
886 .await?;
887 let first_duration = start_time.elapsed();
888 info!("First response: {} (took {:?})", response1, first_duration);
889
890 info!("Making second request (should be cache hit)");
891 let start_time = Instant::now();
892 let response2 = caching_client
893 .chat_completion(test_params.clone(), None)
894 .await?;
895 let second_duration = start_time.elapsed();
896 info!(
897 "Second response: {} (took {:?})",
898 response2, second_duration
899 );
900
901 assert_eq!(response1, response2);
903 info!(
904 " Cache working: responses identical, second request {:?} faster",
905 first_duration.saturating_sub(second_duration)
906 );
907
908 info!("\n=== Example 2: User-Specific Caching ===");
910
911 let user_params = ChatCompletionParams {
912 model: "gpt-3.5-turbo".to_string(),
913 messages: vec![ChatMessage::user(
914 "What is my favorite programming language?",
915 )],
916 temperature: Some(0.7),
917 max_tokens: Some(50),
918 top_p: None,
919 frequency_penalty: None,
920 presence_penalty: None,
921 };
922
923 let user1_response = caching_client
925 .chat_completion(user_params.clone(), Some("user1".to_string()))
926 .await?;
927 let user2_response = caching_client
928 .chat_completion(user_params.clone(), Some("user2".to_string()))
929 .await?;
930
931 info!("User 1 response: {}", user1_response);
932 info!("User 2 response: {}", user2_response);
933
934 let user1_cached = caching_client
936 .chat_completion(user_params, Some("user1".to_string()))
937 .await?;
938 assert_eq!(user1_response, user1_cached);
939 info!(" User-specific caching working");
940
941 info!("\n=== Example 3: Conditional Caching ===");
943
944 let conditional_client = CachingClient::new(
945 Client::builder(Config::builder().api_key("test-api-key").build())?.build(),
946 Some(&cache_dir),
947 )?
948 .with_strategy(CacheStrategy {
949 cache_all: false,
950 min_response_length: 20,
951 cache_deterministic: true,
952 cache_expensive: true,
953 min_cost_threshold: 0.001,
954 enable_warming: false,
955 });
956
957 let deterministic_params = ChatCompletionParams {
959 model: "gpt-3.5-turbo".to_string(),
960 messages: vec![ChatMessage::user("Count from 1 to 5")],
961 temperature: Some(0.0), max_tokens: Some(50),
963 top_p: None,
964 frequency_penalty: None,
965 presence_penalty: None,
966 };
967
968 info!("Testing deterministic request (should cache)");
969 conditional_client
970 .chat_completion(deterministic_params.clone(), None)
971 .await?;
972 conditional_client
973 .chat_completion(deterministic_params, None)
974 .await?;
975
976 let creative_params = ChatCompletionParams {
978 model: "gpt-3.5-turbo".to_string(),
979 messages: vec![ChatMessage::user("Write a creative story")],
980 temperature: Some(1.0), max_tokens: Some(50),
982 top_p: None,
983 frequency_penalty: None,
984 presence_penalty: None,
985 };
986
987 info!("Testing creative request (might not cache)");
988 conditional_client
989 .chat_completion(creative_params.clone(), None)
990 .await?;
991 conditional_client
992 .chat_completion(creative_params, None)
993 .await?;
994
995 info!("\n=== Example 4: Cache Warming ===");
997
998 let warming_client = CachingClient::new(
999 Client::builder(Config::builder().api_key("test-api-key").build())?.build(),
1000 Some(&cache_dir),
1001 )?
1002 .with_strategy(CacheStrategy {
1003 cache_all: true,
1004 enable_warming: true,
1005 ..Default::default()
1006 });
1007
1008 let common_prompts = vec![
1010 ChatCompletionParams {
1011 model: "gpt-3.5-turbo".to_string(),
1012 messages: vec![ChatMessage::user("What is machine learning?")],
1013 temperature: Some(0.0),
1014 max_tokens: Some(100),
1015 top_p: None,
1016 frequency_penalty: None,
1017 presence_penalty: None,
1018 },
1019 ChatCompletionParams {
1020 model: "gpt-3.5-turbo".to_string(),
1021 messages: vec![ChatMessage::user("Explain quantum computing")],
1022 temperature: Some(0.0),
1023 max_tokens: Some(100),
1024 top_p: None,
1025 frequency_penalty: None,
1026 presence_penalty: None,
1027 },
1028 ChatCompletionParams {
1029 model: "gpt-3.5-turbo".to_string(),
1030 messages: vec![ChatMessage::user("What is blockchain?")],
1031 temperature: Some(0.0),
1032 max_tokens: Some(100),
1033 top_p: None,
1034 frequency_penalty: None,
1035 presence_penalty: None,
1036 },
1037 ];
1038
1039 warming_client.warm_cache(common_prompts.clone()).await?;
1040
1041 info!("Testing warmed cache entries");
1043 for (i, params) in common_prompts.iter().enumerate() {
1044 let start_time = Instant::now();
1045 let response = warming_client.chat_completion(params.clone(), None).await?;
1046 let duration = start_time.elapsed();
1047 info!(
1048 "Warmed entry {} retrieved in {:?}: {}",
1049 i + 1,
1050 duration,
1051 response
1052 );
1053 }
1054
1055 info!("\n=== Example 5: Cache Analytics ===");
1057
1058 let (memory_stats, file_stats) = caching_client.get_cache_stats();
1059
1060 info!("Memory cache statistics:");
1061 memory_stats.print_stats();
1062
1063 if let Some(file_stats) = file_stats {
1064 info!("\nFile cache statistics:");
1065 file_stats.print_stats();
1066 }
1067
1068 info!("\n=== Example 6: Cache Maintenance ===");
1070
1071 info!("Performing cache cleanup");
1072 caching_client.cleanup_expired().await?;
1073
1074 let (memory_stats_after, file_stats_after) = caching_client.get_cache_stats();
1076 info!("Statistics after cleanup:");
1077 memory_stats_after.print_stats();
1078
1079 if let Some(ref file_stats_after) = file_stats_after {
1080 info!("\nFile cache after cleanup:");
1081 file_stats_after.print_stats();
1082 }
1083
1084 info!("\n=== Example 7: Cost Analysis ===");
1086
1087 let total_cost_savings = memory_stats_after.cost_savings_usd
1088 + file_stats_after
1089 .as_ref()
1090 .map(|s| s.cost_savings_usd)
1091 .unwrap_or(0.0);
1092
1093 let total_time_savings = memory_stats_after.time_savings_ms
1094 + file_stats_after
1095 .as_ref()
1096 .map(|s| s.time_savings_ms)
1097 .unwrap_or(0);
1098
1099 info!("=== Overall Cache Performance ===");
1100 info!("Total cost savings: ${:.4}", total_cost_savings);
1101 info!("Total time savings: {}ms", total_time_savings);
1102 info!("Cache efficiency: Significant improvement in response times and cost reduction");
1103
1104 info!("Cleaning up cache directory");
1106 if cache_dir.exists() {
1107 fs::remove_dir_all(&cache_dir)
1108 .map_err(|e| Error::InvalidRequest(format!("Cleanup failed: {}", e)))?;
1109 }
1110
1111 info!("Caching strategies example completed successfully!");
1112 Ok(())
1113}