Skip to main content

kelora/rhai_functions/
hashing.rs

1//! Hashing and pseudonymization helpers for Rhai scripts.
2//!
3//! Includes fast hashes, cryptographic digests, and salted pseudonyms.
4
5use argon2::password_hash::{Salt, SaltString};
6use argon2::{Argon2, PasswordHasher};
7use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _};
8use hkdf::Hkdf;
9use hmac::{Hmac, Mac};
10use once_cell::sync::Lazy;
11use rhai::Engine;
12use sha2::{Digest, Sha256};
13use std::collections::HashMap;
14use std::sync::{Mutex, RwLock};
15use xxhash_rust::xxh3::xxh3_64;
16
17type HmacSha256 = Hmac<Sha256>;
18
19/// Runtime configuration for hashing module
20#[derive(Debug, Clone, Default)]
21pub struct HashingRuntimeConfig {
22    pub verbose: u8,
23    pub use_emoji: bool,
24}
25
26static RUNTIME_CONFIG: Lazy<RwLock<HashingRuntimeConfig>> =
27    Lazy::new(|| RwLock::new(HashingRuntimeConfig::default()));
28
29/// Set runtime configuration for hashing functions
30pub fn set_runtime_config(config: HashingRuntimeConfig) {
31    let mut guard = RUNTIME_CONFIG
32        .write()
33        .expect("hashing runtime config poisoned");
34    *guard = config;
35}
36
37/// Log pseudonym initialization (only on verbose level 2+)
38fn log_pseudonym_init(message: &str) {
39    let config = RUNTIME_CONFIG
40        .read()
41        .expect("hashing runtime config poisoned");
42    if config.verbose >= 2 {
43        let prefix = if config.use_emoji { "🔹" } else { "kelora:" };
44        eprintln!("{} {}", prefix, message);
45    }
46}
47
48/// Master key for pseudonymization (derived once at startup)
49static MASTER_KEY: Lazy<MasterKeyState> = Lazy::new(|| {
50    match std::env::var("KELORA_SECRET") {
51        Ok(secret) if !secret.is_empty() => {
52            match derive_master_key_from_secret(&secret) {
53                Ok(key) => {
54                    log_pseudonym_init("pseudonym: ON (stable; KELORA_SECRET)");
55                    MasterKeyState::Stable(key)
56                }
57                Err(e) => {
58                    // Always show fatal errors
59                    eprintln!("kelora: pseudonym init failed: {}", e);
60                    std::process::exit(1);
61                }
62            }
63        }
64        Ok(_) => {
65            // Always show fatal errors
66            eprintln!("kelora: KELORA_SECRET must not be empty");
67            std::process::exit(1);
68        }
69        Err(_) => {
70            // Generate ephemeral key
71            let mut key = [0u8; 32];
72            for byte in &mut key {
73                *byte = fastrand::u8(..);
74            }
75            log_pseudonym_init("pseudonym: ON (ephemeral; not stable)");
76            MasterKeyState::Ephemeral(key)
77        }
78    }
79});
80
81/// Domain-specific derived keys (cached)
82static DOMAIN_KEYS: Lazy<Mutex<HashMap<String, [u8; 32]>>> =
83    Lazy::new(|| Mutex::new(HashMap::new()));
84
85enum MasterKeyState {
86    Stable([u8; 32]),
87    Ephemeral([u8; 32]),
88}
89
90impl MasterKeyState {
91    fn as_bytes(&self) -> &[u8; 32] {
92        match self {
93            MasterKeyState::Stable(k) => k,
94            MasterKeyState::Ephemeral(k) => k,
95        }
96    }
97}
98
99/// Derive master key from secret using Argon2id
100fn derive_master_key_from_secret(secret: &str) -> Result<[u8; 32], String> {
101    let argon2 = Argon2::new(
102        argon2::Algorithm::Argon2id,
103        argon2::Version::V0x13,
104        argon2::Params::new(
105            64 * 1024, // 64 MiB
106            3,         // iterations
107            1,         // parallelism
108            Some(32),  // output length
109        )
110        .map_err(|e| format!("Argon2 params error: {}", e))?,
111    );
112
113    // Use fixed salt "kelora:v1:master"
114    let salt = SaltString::encode_b64(b"kelora:v1:master")
115        .map_err(|e| format!("Salt encoding error: {}", e))?;
116
117    let hash = argon2
118        .hash_password(secret.as_bytes(), Salt::try_from(salt.as_str()).unwrap())
119        .map_err(|e| format!("Argon2 hashing error: {}", e))?;
120
121    let hash_bytes = hash
122        .hash
123        .ok_or_else(|| "Argon2 produced no hash".to_string())?;
124
125    let mut key = [0u8; 32];
126    key.copy_from_slice(hash_bytes.as_bytes());
127    Ok(key)
128}
129
130/// Derive domain-specific key using HKDF-SHA256
131fn derive_domain_key(domain: &str) -> Result<[u8; 32], String> {
132    // Check cache first
133    {
134        let cache = DOMAIN_KEYS.lock().unwrap();
135        if let Some(key) = cache.get(domain) {
136            return Ok(*key);
137        }
138    }
139
140    let master = MASTER_KEY.as_bytes();
141    let info = format!("kelora:v1:{}", domain);
142
143    let hkdf = Hkdf::<Sha256>::new(None, master);
144    let mut okm = [0u8; 32];
145    hkdf.expand(info.as_bytes(), &mut okm)
146        .map_err(|e| format!("HKDF expansion error: {}", e))?;
147
148    // Cache the derived key
149    {
150        let mut cache = DOMAIN_KEYS.lock().unwrap();
151        cache.insert(domain.to_string(), okm);
152    }
153
154    Ok(okm)
155}
156
157/// Generate pseudonym token using HMAC-SHA256
158fn pseudonym_impl(value: &str, domain: &str) -> Result<String, Box<rhai::EvalAltResult>> {
159    if domain.is_empty() {
160        return Err("pseudonym: domain must be non-empty".into());
161    }
162
163    // Force initialization of master key (triggers logging)
164    let _ = MASTER_KEY.as_bytes();
165
166    let domain_key = derive_domain_key(domain)
167        .map_err(|e| format!("pseudonym: domain key derivation failed: {}", e))?;
168
169    // HMAC-SHA256(key=domain_key, data=domain || value)
170    let mut mac =
171        HmacSha256::new_from_slice(&domain_key).map_err(|e| format!("HMAC init error: {}", e))?;
172
173    mac.update(domain.as_bytes());
174    mac.update(value.as_bytes());
175
176    let result = mac.finalize();
177    let tag = result.into_bytes();
178
179    // base64url encode (unpadded) and take first 24 chars
180    let encoded = URL_SAFE_NO_PAD.encode(tag);
181    Ok(encoded[..24].to_string())
182}
183
184/// Fast non-cryptographic hash for bucketing/sampling
185/// Uses xxh3_64 for performance
186fn bucket_impl(value: &str) -> i64 {
187    xxh3_64(value.as_bytes()) as i64
188}
189
190/// Apply a named hash algorithm to input
191/// Supported: "sha256" (default), "xxh3"
192fn hash_impl(value: &str, algo: &str) -> Result<String, Box<rhai::EvalAltResult>> {
193    let algo_lower = algo.to_lowercase();
194    match algo_lower.as_str() {
195        "sha256" => {
196            let mut hasher = Sha256::new();
197            hasher.update(value.as_bytes());
198            Ok(hex::encode(hasher.finalize()))
199        }
200        "xxh3" => {
201            let hash = xxh3_64(value.as_bytes());
202            Ok(format!("{:016x}", hash))
203        }
204        _ => Err(format!("Unknown hash algorithm '{}'. Supported: sha256, xxh3", algo).into()),
205    }
206}
207
208/// Wrapper for hash with default algorithm
209fn hash_default_impl(value: &str) -> Result<String, Box<rhai::EvalAltResult>> {
210    hash_impl(value, "sha256")
211}
212
213/// Register hashing functions with the Rhai engine
214pub fn register_functions(engine: &mut Engine) {
215    // bucket() - fast non-cryptographic hash for bucketing/sampling
216    engine.register_fn("bucket", bucket_impl);
217
218    // hash() - multi-algorithm hashing
219    engine.register_fn("hash", hash_default_impl);
220    engine.register_fn("hash", hash_impl);
221
222    // pseudonym() - domain-separated pseudonymization
223    engine.register_fn("pseudonym", pseudonym_impl);
224}
225
226#[cfg(test)]
227mod tests {
228    use super::*;
229
230    #[test]
231    fn test_bucket() {
232        let result1 = bucket_impl("test");
233        let result2 = bucket_impl("test");
234        let result3 = bucket_impl("other");
235
236        // Same input should produce same hash
237        assert_eq!(result1, result2);
238        // Different input should (probably) produce different hash
239        assert_ne!(result1, result3);
240    }
241
242    #[test]
243    fn test_hash_sha256() {
244        let result = hash_impl("hello", "sha256").unwrap();
245        assert_eq!(
246            result,
247            "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
248        );
249    }
250
251    #[test]
252    fn test_hash_xxh3() {
253        let result = hash_impl("hello", "xxh3").unwrap();
254        // xxh3 is deterministic, just verify it's a valid hex string
255        assert_eq!(result.len(), 16);
256        assert!(result.chars().all(|c| c.is_ascii_hexdigit()));
257    }
258
259    #[test]
260    fn test_hash_unknown_algo() {
261        let result = hash_impl("hello", "unknown");
262        assert!(result.is_err());
263    }
264
265    #[test]
266    fn test_hash_default() {
267        let result = hash_default_impl("hello").unwrap();
268        // Should default to sha256
269        assert_eq!(
270            result,
271            "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
272        );
273    }
274
275    #[test]
276    fn test_pseudonym_empty_domain() {
277        let result = pseudonym_impl("value", "");
278        assert!(result.is_err());
279        let err_msg = result.unwrap_err().to_string();
280        assert!(err_msg.contains("domain must be non-empty"));
281    }
282
283    #[test]
284    fn test_pseudonym_deterministic() {
285        // Same value and domain should produce same token
286        let result1 = pseudonym_impl("user123", "kelora:v1:email").unwrap();
287        let result2 = pseudonym_impl("user123", "kelora:v1:email").unwrap();
288        assert_eq!(result1, result2);
289        assert_eq!(result1.len(), 24);
290    }
291
292    #[test]
293    fn test_pseudonym_domain_separation() {
294        // Same value, different domains should produce different tokens
295        let result1 = pseudonym_impl("user123", "kelora:v1:email").unwrap();
296        let result2 = pseudonym_impl("user123", "kelora:v1:ip").unwrap();
297        assert_ne!(result1, result2);
298    }
299
300    #[test]
301    fn test_pseudonym_different_values() {
302        // Different values, same domain should produce different tokens
303        let result1 = pseudonym_impl("user123", "kelora:v1:email").unwrap();
304        let result2 = pseudonym_impl("user456", "kelora:v1:email").unwrap();
305        assert_ne!(result1, result2);
306    }
307
308    #[test]
309    fn test_pseudonym_output_format() {
310        let result = pseudonym_impl("test", "kelora:v1:test").unwrap();
311        // Should be exactly 24 characters
312        assert_eq!(result.len(), 24);
313        // Should only contain base64url characters (no padding)
314        assert!(result
315            .chars()
316            .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_'));
317        assert!(!result.contains('='));
318    }
319
320    #[test]
321    fn test_rhai_integration() {
322        let mut engine = rhai::Engine::new();
323        register_functions(&mut engine);
324
325        // Test bucket
326        let result: i64 = engine.eval(r#"bucket("test")"#).unwrap();
327        assert_eq!(result, bucket_impl("test"));
328
329        // Test hash with default
330        let result: String = engine.eval(r#"hash("hello")"#).unwrap();
331        assert_eq!(
332            result,
333            "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
334        );
335
336        // Test hash with algo
337        let result: String = engine.eval(r#"hash("hello", "xxh3")"#).unwrap();
338        assert_eq!(result.len(), 16);
339
340        // Test pseudonym
341        let result: String = engine
342            .eval(r#"pseudonym("user123", "kelora:v1:email")"#)
343            .unwrap();
344        assert_eq!(result.len(), 24);
345
346        // Test pseudonym with empty domain
347        let result = engine.eval::<String>(r#"pseudonym("user123", "")"#);
348        assert!(result.is_err());
349    }
350}