Skip to main content

somatize_core/
cache.rs

1//! Content-addressable caching — keys, traits, and metadata.
2//!
3//! [`CacheKey`] is a SHA-256 hash of computation inputs. Two cache keys:
4//! - **State key**: `hash(config + training_data)` — for fit() results
5//! - **Output key**: `hash(config + state + input)` — for forward() results
6//!
7//! [`CacheStore`] is the K/V interface; implementations live in soma-runtime.
8
9use crate::error::Result;
10use crate::value::Value;
11use chrono::{DateTime, Utc};
12use serde::{Deserialize, Serialize};
13use sha2::{Digest, Sha256};
14use std::fmt;
15
16/// Content-addressable hash identifying a computation.
17#[derive(Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
18pub struct CacheKey(pub [u8; 32]);
19
20impl CacheKey {
21    /// Create a cache key by hashing arbitrary byte slices.
22    pub fn from_parts(parts: &[&[u8]]) -> Self {
23        let mut hasher = Sha256::new();
24        for part in parts {
25            // Length-prefix each part to avoid collisions between
26            // concat("ab", "c") and concat("a", "bc")
27            hasher.update((part.len() as u64).to_le_bytes());
28            hasher.update(part);
29        }
30        Self(hasher.finalize().into())
31    }
32
33    /// Create a cache key for a filter's trained state.
34    /// key = hash(filter_config_hash + training_data_hash)
35    pub fn for_state(config_hash: &CacheKey, data_hash: &CacheKey) -> Self {
36        Self::from_parts(&[&config_hash.0, &data_hash.0])
37    }
38
39    /// Create a cache key for a filter's output.
40    /// key = hash(filter_config_hash + state_hash + input_data_hash)
41    pub fn for_output(
42        config_hash: &CacheKey,
43        state_hash: &CacheKey,
44        input_hash: &CacheKey,
45    ) -> Self {
46        Self::from_parts(&[&config_hash.0, &state_hash.0, &input_hash.0])
47    }
48
49    /// Hash arbitrary serializable data.
50    pub fn hash_data(data: &[u8]) -> Self {
51        Self::from_parts(&[data])
52    }
53
54    /// Returns the hex representation.
55    pub fn to_hex(&self) -> String {
56        self.0.iter().map(|b| format!("{b:02x}")).collect()
57    }
58}
59
60impl fmt::Debug for CacheKey {
61    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
62        write!(f, "CacheKey({}...)", &self.to_hex()[..12])
63    }
64}
65
66impl fmt::Display for CacheKey {
67    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
68        write!(f, "{}", &self.to_hex()[..16])
69    }
70}
71
72/// Which storage tier a cached entry lives in.
73#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
74pub enum CacheTier {
75    Memory,
76    Local,
77    Remote,
78}
79
80/// Where a cached value originated.
81#[derive(Debug, Clone, Serialize, Deserialize)]
82pub enum Origin {
83    Computed {
84        node_id: String,
85        run_id: String,
86    },
87    Ingested {
88        source: String,
89    },
90    Streamed {
91        window_start: DateTime<Utc>,
92        window_end: DateTime<Utc>,
93    },
94}
95
96/// Metadata about a cached entry, queryable without loading the value.
97#[derive(Debug, Clone, Serialize, Deserialize)]
98pub struct EntryMeta {
99    pub key: CacheKey,
100    pub size_bytes: u64,
101    pub created_at: DateTime<Utc>,
102    pub last_accessed: DateTime<Utc>,
103    pub ttl: Option<std::time::Duration>,
104    pub origin: Origin,
105}
106
107/// The K/V cache store interface.
108///
109/// Implementations may be in-memory, on-disk (RocksDB/sled),
110/// or remote (S3). The tiered cache composes multiple stores.
111pub trait CacheStore: Send + Sync {
112    fn get(&self, key: &CacheKey) -> Result<Option<Value>>;
113    fn put(&self, key: &CacheKey, value: &Value) -> Result<()>;
114    fn exists(&self, key: &CacheKey) -> Result<bool>;
115    fn remove(&self, key: &CacheKey) -> Result<()>;
116    fn metadata(&self, key: &CacheKey) -> Result<Option<EntryMeta>>;
117}
118
119#[cfg(test)]
120mod tests {
121    use super::*;
122
123    #[test]
124    fn cache_key_deterministic() {
125        let k1 = CacheKey::from_parts(&[b"hello", b"world"]);
126        let k2 = CacheKey::from_parts(&[b"hello", b"world"]);
127        assert_eq!(k1, k2);
128    }
129
130    #[test]
131    fn cache_key_sensitive_to_content() {
132        let k1 = CacheKey::from_parts(&[b"hello", b"world"]);
133        let k2 = CacheKey::from_parts(&[b"hello", b"world!"]);
134        assert_ne!(k1, k2);
135    }
136
137    #[test]
138    fn cache_key_sensitive_to_part_boundaries() {
139        // "ab" + "c" must differ from "a" + "bc"
140        let k1 = CacheKey::from_parts(&[b"ab", b"c"]);
141        let k2 = CacheKey::from_parts(&[b"a", b"bc"]);
142        assert_ne!(k1, k2);
143    }
144
145    #[test]
146    fn cache_key_for_state() {
147        let config = CacheKey::hash_data(b"scaler_config");
148        let data = CacheKey::hash_data(b"training_data");
149        let state_key = CacheKey::for_state(&config, &data);
150
151        // Same inputs → same key
152        let state_key2 = CacheKey::for_state(&config, &data);
153        assert_eq!(state_key, state_key2);
154
155        // Different data → different key
156        let data2 = CacheKey::hash_data(b"different_data");
157        let state_key3 = CacheKey::for_state(&config, &data2);
158        assert_ne!(state_key, state_key3);
159    }
160
161    #[test]
162    fn cache_key_for_output() {
163        let config = CacheKey::hash_data(b"config");
164        let state = CacheKey::hash_data(b"state");
165        let input = CacheKey::hash_data(b"input");
166        let key = CacheKey::for_output(&config, &state, &input);
167
168        // Different state → different key
169        let state2 = CacheKey::hash_data(b"state2");
170        let key2 = CacheKey::for_output(&config, &state2, &input);
171        assert_ne!(key, key2);
172    }
173
174    #[test]
175    fn cache_key_hex_and_display() {
176        let key = CacheKey::hash_data(b"test");
177        let hex = key.to_hex();
178        assert_eq!(hex.len(), 64); // 32 bytes = 64 hex chars
179
180        let display = format!("{key}");
181        assert_eq!(display.len(), 16); // truncated display
182
183        let debug = format!("{key:?}");
184        assert!(debug.starts_with("CacheKey("));
185    }
186
187    #[test]
188    fn cache_key_serde_roundtrip() {
189        let key = CacheKey::hash_data(b"test_data");
190        let json = serde_json::to_string(&key).unwrap();
191        let deserialized: CacheKey = serde_json::from_str(&json).unwrap();
192        assert_eq!(key, deserialized);
193    }
194}