openlatch-client 0.0.0

The open-source security layer for AI agents — client forwarder
Documentation
/// In-memory duplicate event detection store with a 100ms TTL.
///
/// Deduplicates events by hashing (session_id + tool_name + canonical JSON of tool_input).
/// Duplicate events within the TTL window return the same verdict but are NOT logged,
/// preventing log spam from repeated hook calls within a single agent operation.
///
/// # Architecture
///
/// Uses `DashMap` for lock-free concurrent access. The hash key is a `u64` computed by
/// SipHash (via `DefaultHasher`). SipHash is adequate here — this is an internal dedup
/// mechanism, not a security hash. Collisions within a 100ms window are acceptable:
/// a false-positive dedup causes a single event to be skipped, not a security failure.
///
/// # Performance
///
/// PERFORMANCE: `DashMap` provides O(1) average-case reads/writes with no global lock.
/// The 100ms TTL window is small enough that the map stays tiny in practice.
use dashmap::DashMap;
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use std::time::{Duration, Instant};

/// Deduplication TTL window per EVNT-06.
const DEDUP_TTL: Duration = Duration::from_millis(100);

/// In-memory store for deduplicating events within a configurable TTL window.
///
/// Thread-safe via `DashMap`. All methods take `&self` (no exclusive lock needed).
pub struct DedupStore {
    inner: DashMap<u64, Instant>,
    ttl: Duration,
}

impl DedupStore {
    /// Create a new `DedupStore` with the default 100ms TTL.
    pub fn new() -> Self {
        Self {
            inner: DashMap::new(),
            ttl: DEDUP_TTL,
        }
    }

    /// Check if this event is a duplicate and insert it if it is not.
    ///
    /// Returns `true` if the event was seen within the TTL window (it IS a duplicate).
    /// Returns `false` if the event is new and inserts it for future dedup checks.
    ///
    /// The dedup key is: SHA-like hash of `(session_id, tool_name, canonical_json(tool_input))`.
    pub fn check_and_insert(
        &self,
        session_id: &str,
        tool_name: &str,
        tool_input: &serde_json::Value,
    ) -> bool {
        let key = self.compute_hash(session_id, tool_name, tool_input);
        let now = Instant::now();

        if let Some(entry) = self.inner.get(&key) {
            if now.duration_since(*entry.value()) < self.ttl {
                return true; // duplicate within TTL
            }
        }
        self.inner.insert(key, now);
        false
    }

    /// Compute a dedup key by hashing the event's identity fields.
    ///
    /// Uses `DefaultHasher` (SipHash) — fast and sufficient for collision avoidance
    /// within a 100ms window. This is NOT a security hash.
    ///
    /// Tool input is hashed directly from the JSON Value tree with keys sorted at each
    /// level, ensuring deterministic hashing regardless of JSON key order in the payload.
    /// This avoids the allocations of serializing to a canonical JSON string.
    fn compute_hash(
        &self,
        session_id: &str,
        tool_name: &str,
        tool_input: &serde_json::Value,
    ) -> u64 {
        let mut hasher = DefaultHasher::new();
        session_id.hash(&mut hasher);
        tool_name.hash(&mut hasher);
        hash_value(tool_input, &mut hasher);
        hasher.finish()
    }

    /// Evict all entries older than the TTL.
    ///
    /// Call periodically to prevent unbounded memory growth in long-running sessions.
    /// Under normal load (100ms TTL, <1000 events/session) the map stays very small
    /// and eviction is low-priority.
    pub fn evict_expired(&self) {
        let now = Instant::now();
        self.inner.retain(|_, v| now.duration_since(*v) < self.ttl);
    }
}

impl Default for DedupStore {
    fn default() -> Self {
        Self::new()
    }
}

/// Hash a JSON value directly into the hasher with keys sorted at each nesting level.
///
/// This avoids allocating intermediate strings or BTreeMaps — the value tree is walked
/// once, feeding bytes directly into the hasher. Type discriminant tags ensure that
/// `"42"` (string) and `42` (number) produce different hashes.
///
/// Defensive measure: object keys are collected into a Vec and sorted before hashing,
/// ensuring deterministic output even if `preserve_order` is enabled by a transitive dep.
fn hash_value(value: &serde_json::Value, hasher: &mut impl Hasher) {
    match value {
        serde_json::Value::Null => 0u8.hash(hasher),
        serde_json::Value::Bool(b) => {
            1u8.hash(hasher);
            b.hash(hasher);
        }
        serde_json::Value::Number(n) => {
            2u8.hash(hasher);
            // Hash the debug representation — covers i64, u64, f64 uniformly
            format!("{n}").hash(hasher);
        }
        serde_json::Value::String(s) => {
            3u8.hash(hasher);
            s.hash(hasher);
        }
        serde_json::Value::Array(arr) => {
            4u8.hash(hasher);
            arr.len().hash(hasher);
            for item in arr {
                hash_value(item, hasher);
            }
        }
        serde_json::Value::Object(map) => {
            5u8.hash(hasher);
            map.len().hash(hasher);
            // Sort keys for deterministic hashing regardless of insertion order
            let mut keys: Vec<&String> = map.keys().collect();
            keys.sort();
            for key in keys {
                key.hash(hasher);
                hash_value(&map[key], hasher);
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use serde_json::json;

    #[test]
    fn test_first_event_is_not_duplicate() {
        let store = DedupStore::new();
        let input = json!({"command": "ls -la"});

        let is_dup = store.check_and_insert("session-1", "bash", &input);

        assert!(!is_dup, "first occurrence must not be a duplicate");
    }

    #[test]
    fn test_same_event_within_ttl_is_duplicate() {
        let store = DedupStore::new();
        let input = json!({"command": "ls -la"});

        // First call — not a duplicate
        let first = store.check_and_insert("session-1", "bash", &input);
        // Second call immediately — within 100ms TTL
        let second = store.check_and_insert("session-1", "bash", &input);

        assert!(!first, "first occurrence must not be a duplicate");
        assert!(second, "immediate repeat must be detected as duplicate");
    }

    #[test]
    fn test_same_event_after_ttl_is_not_duplicate() {
        let store = DedupStore::new();
        let input = json!({"command": "ls -la"});

        store.check_and_insert("session-1", "bash", &input);

        // Wait beyond the 100ms TTL
        std::thread::sleep(Duration::from_millis(150));

        let after_ttl = store.check_and_insert("session-1", "bash", &input);
        assert!(!after_ttl, "event after TTL expiry must not be a duplicate");
    }

    #[test]
    fn test_different_events_are_not_duplicates() {
        let store = DedupStore::new();

        let first = store.check_and_insert("session-1", "bash", &json!({"cmd": "ls"}));
        let second =
            store.check_and_insert("session-1", "read_file", &json!({"path": "/etc/hosts"}));

        assert!(!first, "first event must not be a duplicate");
        assert!(!second, "different event must not be a duplicate");
    }

    #[test]
    fn test_different_sessions_same_tool_not_duplicate() {
        let store = DedupStore::new();
        let input = json!({"command": "ls"});

        store.check_and_insert("session-A", "bash", &input);
        let second = store.check_and_insert("session-B", "bash", &input);

        assert!(
            !second,
            "same tool call in different session must not be a duplicate"
        );
    }

    #[test]
    fn test_evict_expired_removes_old_entries() {
        let store = DedupStore::new();
        let input = json!({"key": "value"});

        store.check_and_insert("session-1", "bash", &input);

        // Confirm it's in the map (next call is a duplicate)
        assert!(
            store.check_and_insert("session-1", "bash", &input),
            "entry must exist before eviction"
        );

        // Wait for TTL to expire, then evict
        std::thread::sleep(Duration::from_millis(150));
        store.evict_expired();

        // After eviction, the same event is no longer a duplicate
        let after_evict = store.check_and_insert("session-1", "bash", &input);
        assert!(
            !after_evict,
            "evicted entry must not be a duplicate on next check"
        );
    }

    #[test]
    fn test_dedup_key_order_independent() {
        // Two JSON objects with the same keys in different insertion order
        // must produce the same dedup hash (canonical JSON sorts keys).
        let store = DedupStore::new();

        // Manually construct objects with different key order using serde_json::Map
        let mut map_a = serde_json::Map::new();
        map_a.insert("command".to_string(), json!("ls -la"));
        map_a.insert("path".to_string(), json!("/tmp"));
        let input_a = serde_json::Value::Object(map_a);

        let mut map_b = serde_json::Map::new();
        map_b.insert("path".to_string(), json!("/tmp"));
        map_b.insert("command".to_string(), json!("ls -la"));
        let input_b = serde_json::Value::Object(map_b);

        // First call with key order A
        let first = store.check_and_insert("session-1", "bash", &input_a);
        // Second call with key order B — must be detected as duplicate
        let second = store.check_and_insert("session-1", "bash", &input_b);

        assert!(!first, "first occurrence must not be a duplicate");
        assert!(
            second,
            "same logical event with different key order must be detected as duplicate"
        );
    }

    #[test]
    fn test_hash_value_sorts_nested_keys() {
        // Two objects with identical content but different key order must hash the same
        let input_a = json!({"z": {"b": 2, "a": 1}, "a": [{"y": 1, "x": 2}]});
        let input_b = json!({"a": [{"x": 2, "y": 1}], "z": {"a": 1, "b": 2}});

        let mut hasher_a = DefaultHasher::new();
        let mut hasher_b = DefaultHasher::new();
        super::hash_value(&input_a, &mut hasher_a);
        super::hash_value(&input_b, &mut hasher_b);

        assert_eq!(
            hasher_a.finish(),
            hasher_b.finish(),
            "nested keys in different order must produce the same hash"
        );
    }
}