Skip to main content

atomr_agents_cache/
lib.rs

1//! LLM cache.
2//!
3//! `LlmCache` is the trait. Stock backends:
4//! - `InMemoryLlmCache` (always available)
5//! - `SemanticLlmCache<E>` — embeds the prompt; returns a cached
6//!   answer if a previous prompt was within `threshold` cosine
7//!   distance. Useful for "near-duplicate" cache hits.
8//! - `SqliteLlmCache` (feature `sqlite`) and `RedisLlmCache`
9//!   (feature `redis`): backend stubs whose real wire-up lives in a
10//!   deployment patch.
11
12mod inmem;
13mod redis;
14mod semantic;
15mod sqlite;
16
17pub use inmem::InMemoryLlmCache;
18pub use semantic::SemanticLlmCache;
19
20#[cfg(feature = "redis")]
21pub use redis::RedisLlmCache;
22#[cfg(feature = "sqlite")]
23pub use sqlite::SqliteLlmCache;
24
25use async_trait::async_trait;
26use atomr_agents_core::Result;
27use atomr_infer_core::tokens::{FinishReason, TokenUsage};
28use serde::{Deserialize, Serialize};
29use std::hash::{Hash, Hasher};
30
31#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
32pub struct CacheKey {
33    pub model: String,
34    pub messages_hash: u64,
35    pub sampling_hash: u64,
36}
37
38impl CacheKey {
39    pub fn from_batch(batch: &atomr_infer_core::batch::ExecuteBatch) -> Self {
40        let mut mh = std::collections::hash_map::DefaultHasher::new();
41        for m in &batch.messages {
42            (m.role as u8).hash(&mut mh);
43            let s = serde_json::to_string(&m.content).unwrap_or_default();
44            s.hash(&mut mh);
45        }
46        let messages_hash = mh.finish();
47        let mut sh = std::collections::hash_map::DefaultHasher::new();
48        let sampling = serde_json::to_string(&batch.sampling).unwrap_or_default();
49        sampling.hash(&mut sh);
50        let sampling_hash = sh.finish();
51        Self {
52            model: batch.model.clone(),
53            messages_hash,
54            sampling_hash,
55        }
56    }
57}
58
59#[derive(Debug, Clone, Serialize, Deserialize)]
60pub struct CachedTurn {
61    pub text: String,
62    pub usage: TokenUsage,
63    pub finish_reason: Option<FinishReason>,
64}
65
66#[async_trait]
67pub trait LlmCache: Send + Sync + 'static {
68    async fn get(&self, key: &CacheKey) -> Result<Option<CachedTurn>>;
69    async fn put(&self, key: CacheKey, value: CachedTurn) -> Result<()>;
70}