use async_trait::async_trait;
use harness_core::{Memory, MemoryEntry, MemoryError};
use regex::Regex;
use std::collections::HashSet;
use std::sync::Arc;
pub struct GuardedMemory {
inner: Arc<dyn Memory>,
sensitivity_patterns: Vec<Regex>,
blocked_substrings: Vec<String>,
dedup_threshold: f32,
dedup_recall_k: usize,
}
impl GuardedMemory {
pub fn new(inner: Arc<dyn Memory>) -> Self {
Self {
inner,
sensitivity_patterns: default_sensitivity_patterns(),
blocked_substrings: Vec::new(),
dedup_threshold: 0.6,
dedup_recall_k: 5,
}
}
pub fn without_default_sensitivity(mut self) -> Self {
self.sensitivity_patterns.clear();
self
}
pub fn with_sensitivity_pattern(mut self, pat: impl AsRef<str>) -> Result<Self, regex::Error> {
self.sensitivity_patterns.push(Regex::new(pat.as_ref())?);
Ok(self)
}
pub fn with_blocked_substring(mut self, s: impl Into<String>) -> Self {
self.blocked_substrings.push(s.into().to_lowercase());
self
}
pub fn with_dedup_threshold(mut self, t: f32) -> Self {
self.dedup_threshold = t.clamp(0.0, 1.0);
self
}
pub fn with_dedup_recall_k(mut self, k: usize) -> Self {
self.dedup_recall_k = k.max(1);
self
}
fn is_sensitive(&self, content: &str) -> bool {
let lower = content.to_lowercase();
if self.blocked_substrings.iter().any(|s| lower.contains(s)) {
return true;
}
self.sensitivity_patterns
.iter()
.any(|r| r.is_match(content))
}
async fn is_duplicate(&self, entry: &MemoryEntry) -> bool {
if self.dedup_threshold <= 0.0 {
return false;
}
let cands = match self.inner.recall(&entry.content, self.dedup_recall_k).await {
Ok(v) => v,
Err(_) => return false,
};
let new_tokens = jaccard_tokens(&entry.content);
if new_tokens.is_empty() {
return false;
}
for c in cands {
let cand_tokens = jaccard_tokens(&c.content);
if jaccard(&new_tokens, &cand_tokens) >= self.dedup_threshold {
return true;
}
}
false
}
}
#[async_trait]
impl Memory for GuardedMemory {
async fn recall(&self, query: &str, k: usize) -> Result<Vec<MemoryEntry>, MemoryError> {
self.inner.recall(query, k).await
}
async fn write(&self, entry: MemoryEntry) -> Result<(), MemoryError> {
if self.is_sensitive(&entry.content) {
tracing::info!(
content_preview = %entry.content.chars().take(40).collect::<String>(),
"guarded memory: dropping sensitive entry"
);
return Ok(());
}
if self.is_duplicate(&entry).await {
tracing::info!(
content_preview = %entry.content.chars().take(40).collect::<String>(),
"guarded memory: dropping duplicate entry"
);
return Ok(());
}
self.inner.write(entry).await
}
}
fn default_sensitivity_patterns() -> Vec<Regex> {
[
r"\b\d{13,19}\b",
r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b",
r"\b1[3-9]\d{9}\b",
r"[¥$€£₹]\s?\d+(?:[.,]\d+)?",
r"\b(?:USD|CNY|EUR|RMB|HKD|JPY)\s?\d+(?:[.,]\d+)?\b",
]
.iter()
.filter_map(|p| Regex::new(p).ok())
.collect()
}
fn jaccard_tokens(s: &str) -> HashSet<String> {
s.to_lowercase()
.split(|c: char| !c.is_alphanumeric())
.filter(|t| t.len() >= 3)
.map(String::from)
.collect()
}
fn jaccard(a: &HashSet<String>, b: &HashSet<String>) -> f32 {
if a.is_empty() || b.is_empty() {
return 0.0;
}
let inter = a.intersection(b).count() as f32;
let union = a.union(b).count() as f32;
if union == 0.0 { 0.0 } else { inter / union }
}
#[cfg(test)]
mod tests {
use super::*;
use harness_core::Memory;
use std::sync::Mutex;
#[derive(Default)]
struct VecMemory {
store: Mutex<Vec<MemoryEntry>>,
}
#[async_trait]
impl Memory for VecMemory {
async fn recall(&self, query: &str, k: usize) -> Result<Vec<MemoryEntry>, MemoryError> {
let g = self.store.lock().unwrap();
let q_tokens = jaccard_tokens(query);
if q_tokens.is_empty() {
return Ok(g.iter().take(k).cloned().collect());
}
let mut scored: Vec<(u32, &MemoryEntry)> = g
.iter()
.map(|e| {
let hay = e.content.to_lowercase();
let hits: u32 = q_tokens
.iter()
.map(|t| if hay.contains(t.as_str()) { 1 } else { 0 })
.sum();
(hits, e)
})
.filter(|(hits, _)| *hits > 0)
.collect();
scored.sort_by(|a, b| b.0.cmp(&a.0));
Ok(scored.into_iter().take(k).map(|(_, e)| e.clone()).collect())
}
async fn write(&self, entry: MemoryEntry) -> Result<(), MemoryError> {
self.store.lock().unwrap().push(entry);
Ok(())
}
}
#[tokio::test]
async fn sensitive_credit_card_is_dropped() {
let inner: Arc<dyn Memory> = Arc::new(VecMemory::default());
let mem = GuardedMemory::new(inner.clone());
mem.write(MemoryEntry::new(
"user's card is 4111111111111111 expiry 12/30",
))
.await
.unwrap();
let all = inner.recall("card", 10).await.unwrap();
assert!(all.is_empty(), "credit-card-like content should be dropped");
}
#[tokio::test]
async fn sensitive_email_is_dropped() {
let inner: Arc<dyn Memory> = Arc::new(VecMemory::default());
let mem = GuardedMemory::new(inner.clone());
mem.write(MemoryEntry::new("user's email is ll_faw@hotmail.com"))
.await
.unwrap();
let all = inner.recall("email", 10).await.unwrap();
assert!(all.is_empty());
}
#[tokio::test]
async fn monetary_amounts_are_dropped() {
let inner: Arc<dyn Memory> = Arc::new(VecMemory::default());
let mem = GuardedMemory::new(inner.clone());
mem.write(MemoryEntry::new("用户记录了一笔 ¥199 火锅消费"))
.await
.unwrap();
mem.write(MemoryEntry::new("user spent USD 250 on Claude Code"))
.await
.unwrap();
let all = inner.recall("user", 10).await.unwrap();
assert!(
all.is_empty(),
"monetary patterns should be filtered: {all:?}"
);
}
#[tokio::test]
async fn durable_preferences_pass_through() {
let inner: Arc<dyn Memory> = Arc::new(VecMemory::default());
let mem = GuardedMemory::new(inner.clone());
mem.write(MemoryEntry::new("用户偏好使用微信支付餐饮类支出"))
.await
.unwrap();
mem.write(MemoryEntry::new(
"user prefers concise replies in Slack style",
))
.await
.unwrap();
let all = inner.recall("用户", 10).await.unwrap();
assert_eq!(all.len(), 1, "preference about 用户 should be kept");
}
#[tokio::test]
async fn duplicate_is_dropped() {
let inner: Arc<dyn Memory> = Arc::new(VecMemory::default());
let mem = GuardedMemory::new(inner.clone()).with_dedup_threshold(0.6);
mem.write(MemoryEntry::new(
"user prefers concise replies written in Slack style",
))
.await
.unwrap();
mem.write(MemoryEntry::new(
"user prefers concise replies in Slack tone",
))
.await
.unwrap();
let all = inner.recall("user", 10).await.unwrap();
assert_eq!(
all.len(),
1,
"near-duplicate should not double-store: {all:?}"
);
}
#[tokio::test]
async fn blocked_substring_works() {
let inner: Arc<dyn Memory> = Arc::new(VecMemory::default());
let mem = GuardedMemory::new(inner.clone()).with_blocked_substring("password");
mem.write(MemoryEntry::new("user's password reset is hunter2"))
.await
.unwrap();
let all = inner.recall("password", 10).await.unwrap();
assert!(all.is_empty());
}
}