use crate::error::Result;
use regex::Regex;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PiiType {
PhoneCn,
IdCardCn,
Email,
ApiKey,
CreditCard,
}
impl std::fmt::Display for PiiType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
PiiType::PhoneCn => write!(f, "phone_cn"),
PiiType::IdCardCn => write!(f, "id_card_cn"),
PiiType::Email => write!(f, "email"),
PiiType::ApiKey => write!(f, "api_key"),
PiiType::CreditCard => write!(f, "credit_card"),
}
}
}
#[derive(Debug, Clone)]
pub struct PiiMatch {
pub pii_type: PiiType,
pub start: usize,
pub end: usize,
pub matched: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ContentGuardMode {
Detect,
Redact,
Reject,
}
pub struct ContentGuard {
patterns: Vec<(PiiType, Regex)>,
mode: ContentGuardMode,
}
impl ContentGuard {
pub fn new(mode: ContentGuardMode) -> Self {
Self {
patterns: build_default_patterns(),
mode,
}
}
pub fn detect(&self, content: &str) -> Vec<PiiMatch> {
let mut matches = Vec::new();
for (pii_type, re) in &self.patterns {
for m in re.find_iter(content) {
matches.push(PiiMatch {
pii_type: pii_type.clone(),
start: m.start(),
end: m.end(),
matched: m.as_str().to_string(),
});
}
}
matches.sort_by_key(|a| a.start);
matches
}
pub fn redact(&self, content: &str) -> String {
let mut result = content.to_string();
let matches = self.detect(content);
for m in matches.iter().rev() {
let replacement = match m.pii_type {
PiiType::ApiKey => "[REDACTED:API_KEY]",
PiiType::PhoneCn => "[REDACTED:PHONE]",
PiiType::Email => "[REDACTED:EMAIL]",
PiiType::IdCardCn => "[REDACTED:ID_CARD]",
PiiType::CreditCard => "[REDACTED:CARD]",
};
result.replace_range(m.start..m.end, replacement);
}
result
}
pub fn is_clean(&self, content: &str) -> bool {
self.detect(content).is_empty()
}
pub fn check(&self, content: &str) -> Result<ContentGuardResult> {
let matches = self.detect(content);
if matches.is_empty() {
return Ok(ContentGuardResult::Pass);
}
match self.mode {
ContentGuardMode::Detect => {
let types: Vec<String> = matches.iter().map(|m| m.pii_type.to_string()).collect();
Ok(ContentGuardResult::Detected { pii_types: types })
}
ContentGuardMode::Reject => {
let types: Vec<String> = matches.iter().map(|m| m.pii_type.to_string()).collect();
Ok(ContentGuardResult::Rejected { pii_types: types })
}
ContentGuardMode::Redact => {
let redacted = self.redact(content);
Ok(ContentGuardResult::Redacted(redacted))
}
}
}
}
#[derive(Debug, Clone)]
pub enum ContentGuardResult {
Pass,
Detected { pii_types: Vec<String> },
Rejected { pii_types: Vec<String> },
Redacted(String),
}
impl ContentGuardResult {
pub fn is_rejected(&self) -> bool {
matches!(self, ContentGuardResult::Rejected { .. })
}
}
fn build_default_patterns() -> Vec<(PiiType, Regex)> {
vec![
(PiiType::PhoneCn, Regex::new(r"1[3-9]\d{9}").unwrap()),
(
PiiType::IdCardCn,
Regex::new(
r"\b[1-9]\d{5}(?:19|20)\d{2}(?:0[1-9]|1[0-2])(?:0[1-9]|[12]\d|3[01])\d{3}[\dXx]\b",
)
.unwrap(),
),
(
PiiType::Email,
Regex::new(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b").unwrap(),
),
(
PiiType::ApiKey,
Regex::new(
r"\b(sk-(?:ant-)?[A-Za-z0-9]{16,}|ghp_[A-Za-z0-9]{36,}|xai-[A-Za-z0-9]{16,})\b",
)
.unwrap(),
),
(
PiiType::CreditCard,
Regex::new(r"\b[45]\d{3}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b").unwrap(),
),
]
}
impl crate::guard::Guard for ContentGuard {
fn name(&self) -> &str {
"content_guard"
}
fn check<'a>(
&'a self,
content: &'a str,
_direction: crate::guard::GuardDirection,
) -> futures::future::BoxFuture<'a, crate::error::Result<crate::guard::GuardResult>> {
Box::pin(async move {
let result = self.check(content)?;
Ok(match result {
ContentGuardResult::Pass => crate::guard::GuardResult::Pass,
ContentGuardResult::Detected { pii_types } => crate::guard::GuardResult::Warn {
reasons: vec![format!("检测到敏感信息: {}", pii_types.join(", "))],
},
ContentGuardResult::Rejected { pii_types } => crate::guard::GuardResult::Block {
reason: format!("内容包含敏感信息: {}", pii_types.join(", ")),
},
ContentGuardResult::Redacted(_redacted) => {
tracing::warn!(
pii_found = true,
"内容已脱敏,但在 GuardManager 中回传脱敏文本需调用方另外获取"
);
crate::guard::GuardResult::Warn {
reasons: vec!["内容包含敏感信息,已脱敏处理".to_string()],
}
}
})
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detect_phone() {
let guard = ContentGuard::new(ContentGuardMode::Detect);
let matches = guard.detect("请联系 13812345678 获取帮助");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].pii_type, PiiType::PhoneCn);
}
#[test]
fn test_detect_email() {
let guard = ContentGuard::new(ContentGuardMode::Detect);
let matches = guard.detect("邮箱: user@example.com");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].pii_type, PiiType::Email);
}
#[test]
fn test_detect_api_key() {
let guard = ContentGuard::new(ContentGuardMode::Detect);
let matches = guard.detect("export OPENAI_API_KEY=sk-proj1234567890abcdef");
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].pii_type, PiiType::ApiKey);
}
#[test]
fn test_redact() {
let guard = ContentGuard::new(ContentGuardMode::Redact);
let redacted = guard.redact("电话: 13812345678, 邮箱: user@example.com");
assert!(!redacted.contains("13812345678"));
assert!(!redacted.contains("user@example.com"));
assert!(redacted.contains("[REDACTED:PHONE]"));
assert!(redacted.contains("[REDACTED:EMAIL]"));
}
#[test]
fn test_reject_mode() {
let guard = ContentGuard::new(ContentGuardMode::Reject);
let result = guard.check("我的邮箱是 test@gmail.com").unwrap();
assert!(result.is_rejected());
}
#[test]
fn test_clean_content_passes() {
let guard = ContentGuard::new(ContentGuardMode::Reject);
let result = guard.check("这是一段正常的内容没有敏感信息").unwrap();
assert!(!result.is_rejected());
}
}