reflex/hashing/
mod.rs

1//! Hash utilities for cache keys and identifiers.
2//!
3//! Uses BLAKE3. Prefer 32-byte hashes for exact keys and 64-bit hashes for compact ids.
4
5use blake3::Hasher;
6
7/// Hashes a prompt to a 32-byte BLAKE3 digest.
8#[inline]
9pub fn hash_prompt(prompt: &str) -> [u8; 32] {
10    *blake3::hash(prompt.as_bytes()).as_bytes()
11}
12
13/// Computes a 64-bit hash of the input data using BLAKE3, truncated from 256 bits.
14///
15/// Use this for compact ids (tenant/context). Not suitable for security.
16#[inline]
17pub fn hash_to_u64(data: &[u8]) -> u64 {
18    let hash = blake3::hash(data);
19    let bytes: [u8; 8] = hash.as_bytes()[0..8]
20        .try_into()
21        .expect("BLAKE3 always produces at least 8 bytes");
22    u64::from_le_bytes(bytes)
23}
24
25/// Hashes a `(role, plan)` pair into a compact 64-bit context hash.
26#[inline]
27pub fn hash_context(role: &str, plan: &str) -> u64 {
28    let mut hasher = Hasher::new();
29    hasher.update(role.as_bytes());
30    hasher.update(b"|");
31    hasher.update(plan.as_bytes());
32
33    let hash = hasher.finalize();
34    let bytes: [u8; 8] = hash.as_bytes()[0..8]
35        .try_into()
36        .expect("BLAKE3 always produces at least 8 bytes");
37    u64::from_le_bytes(bytes)
38}
39
40/// Hashes a tenant identifier string to a compact 64-bit id.
41#[inline]
42pub fn hash_tenant_id(tenant: &str) -> u64 {
43    hash_to_u64(tenant.as_bytes())
44}
45
46/// Hashes cache content to a 32-byte digest (tenant + context + embedding + payload).
47#[inline]
48pub fn hash_cache_content(
49    tenant_id: u64,
50    context_hash: u64,
51    embedding: &[u8],
52    payload: &[u8],
53) -> [u8; 32] {
54    let mut hasher = Hasher::new();
55    hasher.update(&tenant_id.to_le_bytes());
56    hasher.update(&context_hash.to_le_bytes());
57    hasher.update(embedding);
58    hasher.update(payload);
59    *hasher.finalize().as_bytes()
60}
61
62#[cfg(test)]
63mod tests {
64    use super::*;
65    use std::collections::HashSet;
66
67    #[test]
68    fn test_hash_prompt_determinism() {
69        let prompt = "What is the capital of France?";
70
71        let hash1 = hash_prompt(prompt);
72        let hash2 = hash_prompt(prompt);
73        let hash3 = hash_prompt(prompt);
74
75        assert_eq!(hash1, hash2);
76        assert_eq!(hash2, hash3);
77    }
78
79    #[test]
80    fn test_hash_prompt_uniqueness() {
81        let prompts = [
82            "What is the capital of France?",
83            "What is the capital of Germany?",
84            "what is the capital of france?",
85            "What is the capital of France? ",
86        ];
87
88        let hashes: Vec<_> = prompts.iter().map(|p| hash_prompt(p)).collect();
89        let unique_hashes: HashSet<_> = hashes.iter().collect();
90
91        assert_eq!(unique_hashes.len(), prompts.len());
92    }
93
94    #[test]
95    fn test_hash_prompt_output_size() {
96        let hash = hash_prompt("test");
97        assert_eq!(hash.len(), 32);
98    }
99
100    #[test]
101    fn test_hash_prompt_empty_string() {
102        let hash = hash_prompt("");
103        assert!(!hash.iter().all(|&b| b == 0));
104    }
105
106    #[test]
107    fn test_hash_prompt_unicode() {
108        let prompt = "Quelle est la capitale de la France? ";
109        let hash = hash_prompt(prompt);
110        assert_eq!(hash.len(), 32);
111
112        let hash2 = hash_prompt("What is the capital of France?");
113        assert_ne!(hash, hash2);
114    }
115
116    #[test]
117    fn test_hash_to_u64_determinism() {
118        let data = b"test-tenant-id-12345";
119
120        let hash1 = hash_to_u64(data);
121        let hash2 = hash_to_u64(data);
122        let hash3 = hash_to_u64(data);
123
124        assert_eq!(hash1, hash2);
125        assert_eq!(hash2, hash3);
126    }
127
128    #[test]
129    fn test_hash_to_u64_uniqueness() {
130        let inputs = [
131            b"tenant-001".as_slice(),
132            b"tenant-002".as_slice(),
133            b"TENANT-001".as_slice(),
134            b"tenant-001 ".as_slice(),
135        ];
136
137        let hashes: Vec<_> = inputs.iter().map(|i| hash_to_u64(i)).collect();
138        let unique_hashes: HashSet<_> = hashes.iter().collect();
139
140        assert_eq!(unique_hashes.len(), inputs.len());
141    }
142
143    #[test]
144    fn test_hash_to_u64_empty_input() {
145        let hash = hash_to_u64(b"");
146        let hash2 = hash_to_u64(b"");
147        assert_eq!(hash, hash2);
148    }
149
150    #[test]
151    fn test_hash_context_determinism() {
152        let hash1 = hash_context("admin", "enterprise");
153        let hash2 = hash_context("admin", "enterprise");
154        let hash3 = hash_context("admin", "enterprise");
155
156        assert_eq!(hash1, hash2);
157        assert_eq!(hash2, hash3);
158    }
159
160    #[test]
161    fn test_hash_context_role_sensitivity() {
162        let admin_hash = hash_context("admin", "basic");
163        let user_hash = hash_context("user", "basic");
164        let guest_hash = hash_context("guest", "basic");
165
166        assert_ne!(admin_hash, user_hash);
167        assert_ne!(user_hash, guest_hash);
168        assert_ne!(admin_hash, guest_hash);
169    }
170
171    #[test]
172    fn test_hash_context_plan_sensitivity() {
173        let free_hash = hash_context("user", "free");
174        let basic_hash = hash_context("user", "basic");
175        let premium_hash = hash_context("user", "premium");
176
177        assert_ne!(free_hash, basic_hash);
178        assert_ne!(basic_hash, premium_hash);
179    }
180
181    #[test]
182    fn test_hash_context_separator_prevents_ambiguity() {
183        let hash1 = hash_context("ab", "cd");
184        let hash2 = hash_context("abc", "d");
185        let hash3 = hash_context("a", "bcd");
186
187        assert_ne!(hash1, hash2);
188        assert_ne!(hash1, hash3);
189        assert_ne!(hash2, hash3);
190    }
191
192    #[test]
193    fn test_hash_tenant_id_consistency() {
194        let tenant = "acme-corp-production";
195
196        let id1 = hash_tenant_id(tenant);
197        let id2 = hash_tenant_id(tenant);
198
199        assert_eq!(id1, id2);
200    }
201
202    #[test]
203    fn test_hash_tenant_id_equals_hash_to_u64() {
204        let tenant = "test-tenant";
205        assert_eq!(hash_tenant_id(tenant), hash_to_u64(tenant.as_bytes()));
206    }
207
208    #[test]
209    fn test_hash_cache_content_determinism() {
210        let hash1 = hash_cache_content(100, 200, &[1, 2, 3], &[4, 5, 6]);
211        let hash2 = hash_cache_content(100, 200, &[1, 2, 3], &[4, 5, 6]);
212
213        assert_eq!(hash1, hash2);
214    }
215
216    #[test]
217    fn test_hash_cache_content_sensitivity() {
218        let base = hash_cache_content(100, 200, &[1, 2, 3], &[4, 5, 6]);
219
220        let changed_tenant = hash_cache_content(101, 200, &[1, 2, 3], &[4, 5, 6]);
221        assert_ne!(base, changed_tenant);
222
223        let changed_context = hash_cache_content(100, 201, &[1, 2, 3], &[4, 5, 6]);
224        assert_ne!(base, changed_context);
225
226        let changed_embedding = hash_cache_content(100, 200, &[1, 2, 4], &[4, 5, 6]);
227        assert_ne!(base, changed_embedding);
228
229        let changed_payload = hash_cache_content(100, 200, &[1, 2, 3], &[4, 5, 7]);
230        assert_ne!(base, changed_payload);
231    }
232
233    #[test]
234    fn test_hash_cache_content_output_size() {
235        let hash = hash_cache_content(0, 0, &[], &[]);
236        assert_eq!(hash.len(), 32);
237    }
238
239    #[test]
240    fn test_hash_performance_sanity() {
241        let prompt = "A moderately long prompt that represents typical user input for testing.";
242
243        let prompt = std::hint::black_box(prompt);
244        for _ in 0..10_000 {
245            let _ = std::hint::black_box(hash_prompt(std::hint::black_box(prompt)));
246        }
247    }
248}