use crate::label::PrivacyLabel;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TokenizerSpec {
HuggingFaceJson,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PostProcessorKind {
Bioes,
Bio,
#[allow(dead_code)]
Iob,
#[allow(dead_code)]
PerTokenMax,
}
#[derive(Debug, Clone, Default)]
pub struct ThresholdProfile {
pub thresholds: std::collections::BTreeMap<PrivacyLabel, f32>,
}
#[derive(Debug, Clone, Default)]
#[non_exhaustive]
pub struct LangConditionalThresholdProfile {
pub default: ThresholdProfile,
pub overrides: std::collections::BTreeMap<(String, PrivacyLabel), f32>,
}
impl LangConditionalThresholdProfile {
pub fn new(default: ThresholdProfile) -> Self {
Self {
default,
overrides: std::collections::BTreeMap::new(),
}
}
pub fn with_override(
mut self,
lang: impl Into<String>,
label: PrivacyLabel,
threshold: f32,
) -> Self {
self.overrides.insert((lang.into(), label), threshold);
self
}
pub fn threshold_for(&self, label: PrivacyLabel, lang: Option<&str>) -> Option<f32> {
if let Some(l) = lang {
if let Some(t) = self.overrides.get(&(l.to_string(), label)) {
return Some(*t);
}
}
self.default.thresholds.get(&label).copied()
}
}
pub static XLMR_LANG_CONDITIONAL_PROFILE: once_cell::sync::Lazy<LangConditionalThresholdProfile> =
once_cell::sync::Lazy::new(|| {
let default = (*XLMR_PROFILE).clone();
LangConditionalThresholdProfile::new(default)
.with_override("it", PrivacyLabel::AccountNumber, 1.1) .with_override("de", PrivacyLabel::Person, 1.1) .with_override("fr", PrivacyLabel::AccountNumber, 1.1) .with_override("de", PrivacyLabel::AccountNumber, 1.1) .with_override("en", PrivacyLabel::Person, 1.1) .with_override("zh", PrivacyLabel::AccountNumber, 1.1) });
pub trait ModelDescriptor: Send + Sync {
fn model_id(&self) -> &str;
fn version(&self) -> &str;
fn label_space_version(&self) -> &str;
fn id2label(&self) -> &[&'static str];
fn canonical_mapping(&self, native_label: &str) -> Option<PrivacyLabel>;
fn tokenizer_spec(&self) -> TokenizerSpec;
fn post_processor(&self) -> PostProcessorKind;
fn threshold_profile(&self) -> Option<&ThresholdProfile> {
None
}
fn lang_conditional_profile(&self) -> Option<&LangConditionalThresholdProfile> {
None
}
fn onnx_filename(&self) -> &str {
"model_q4f16.onnx"
}
}
#[derive(Debug)]
pub struct OpenAIPrivacyFilterDescriptor;
impl OpenAIPrivacyFilterDescriptor {
const NATIVE_LABELS: &'static [&'static str] = &[
"private_person",
"private_email",
"private_phone",
"private_address",
"private_date",
"private_url",
"private_account_number",
"secret",
"person",
"email",
"phone",
"address",
"date",
"url",
"account_number",
];
}
impl ModelDescriptor for OpenAIPrivacyFilterDescriptor {
fn model_id(&self) -> &str {
"openai-privacy-filter-v1"
}
fn version(&self) -> &str {
"1.0.0"
}
fn label_space_version(&self) -> &str {
"8class-v1"
}
fn id2label(&self) -> &[&'static str] {
Self::NATIVE_LABELS
}
fn canonical_mapping(&self, native_label: &str) -> Option<PrivacyLabel> {
let normalized = native_label.to_lowercase().replace(['-', ' '], "_");
PrivacyLabel::from_kind(&normalized).or_else(|| PrivacyLabel::from_kind(native_label))
}
fn tokenizer_spec(&self) -> TokenizerSpec {
TokenizerSpec::HuggingFaceJson
}
fn post_processor(&self) -> PostProcessorKind {
PostProcessorKind::Bioes
}
fn threshold_profile(&self) -> Option<&ThresholdProfile> {
None
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub enum XlmrProfileMode {
Default,
FpStrict,
}
#[derive(Debug, Default, Clone, Copy)]
pub struct XlmrPiiDescriptor {
mode: Option<XlmrProfileMode>,
}
impl XlmrPiiDescriptor {
pub const fn with_mode(mode: XlmrProfileMode) -> Self {
Self { mode: Some(mode) }
}
}
impl XlmrPiiDescriptor {
const NATIVE_LABELS: &'static [&'static str] = &[
"GIVENNAME",
"SURNAME",
"TITLE", "EMAIL", "TELEPHONENUM", "STREET",
"BUILDINGNUM",
"CITY",
"ZIPCODE", "DATE",
"TIME", "IDCARDNUM",
"PASSPORTNUM",
"DRIVERLICENSENUM",
"CREDITCARDNUMBER",
"SOCIALNUM",
"TAXNUM", "AGE",
"GENDER",
"SEX",
];
}
impl ModelDescriptor for XlmrPiiDescriptor {
fn model_id(&self) -> &str {
"xlmr-pii-v1"
}
fn version(&self) -> &str {
"1.0.0"
}
fn label_space_version(&self) -> &str {
"8class-v1"
}
fn id2label(&self) -> &[&'static str] {
Self::NATIVE_LABELS
}
fn canonical_mapping(&self, native_label: &str) -> Option<PrivacyLabel> {
match native_label {
"GIVENNAME" | "SURNAME" | "TITLE" => Some(PrivacyLabel::Person),
"EMAIL" => Some(PrivacyLabel::Email),
"TELEPHONENUM" => Some(PrivacyLabel::Phone),
"STREET" | "BUILDINGNUM" | "CITY" | "ZIPCODE" => Some(PrivacyLabel::Address),
"DATE" | "TIME" => Some(PrivacyLabel::Date),
"IDCARDNUM" | "PASSPORTNUM" | "DRIVERLICENSENUM" | "CREDITCARDNUMBER" | "SOCIALNUM"
| "TAXNUM" => Some(PrivacyLabel::AccountNumber),
"AGE" | "GENDER" | "SEX" => None,
_ => None,
}
}
fn tokenizer_spec(&self) -> TokenizerSpec {
TokenizerSpec::HuggingFaceJson
}
fn post_processor(&self) -> PostProcessorKind {
PostProcessorKind::Bio
}
fn onnx_filename(&self) -> &str {
"onnx/model_q4f16.onnx"
}
fn threshold_profile(&self) -> Option<&ThresholdProfile> {
let profile = match self.mode {
Some(XlmrProfileMode::Default) => &*XLMR_PROFILE,
Some(XlmrProfileMode::FpStrict) => &*XLMR_PROFILE_FP_STRICT,
None => xlmr_profile_from_env(),
};
Some(profile)
}
fn lang_conditional_profile(&self) -> Option<&LangConditionalThresholdProfile> {
Some(&XLMR_LANG_CONDITIONAL_PROFILE)
}
}
static XLMR_PROFILE: once_cell::sync::Lazy<ThresholdProfile> = once_cell::sync::Lazy::new(|| {
use std::collections::BTreeMap;
let mut t = BTreeMap::new();
t.insert(PrivacyLabel::Email, 1.1_f32);
t.insert(PrivacyLabel::Phone, 1.1_f32);
t.insert(PrivacyLabel::Address, 1.1_f32);
ThresholdProfile { thresholds: t }
});
static XLMR_PROFILE_FP_STRICT: once_cell::sync::Lazy<ThresholdProfile> =
once_cell::sync::Lazy::new(|| {
use std::collections::BTreeMap;
let mut t = BTreeMap::new();
t.insert(PrivacyLabel::Email, 1.1_f32);
t.insert(PrivacyLabel::Phone, 1.1_f32);
t.insert(PrivacyLabel::Address, 1.1_f32);
t.insert(PrivacyLabel::Person, 1.1_f32);
ThresholdProfile { thresholds: t }
});
fn xlmr_profile_from_env() -> &'static ThresholdProfile {
match std::env::var("VIGIL_XLMR_PROFILE").as_deref() {
Ok("fp_strict") => &XLMR_PROFILE_FP_STRICT,
_ => &XLMR_PROFILE,
}
}
#[derive(Debug)]
pub struct YonigoPiiDescriptor;
impl YonigoPiiDescriptor {
const NATIVE_LABELS: &'static [&'static str] = &[
"GIVENNAME1",
"GIVENNAME2",
"LASTNAME1",
"LASTNAME2",
"LASTNAME3",
"TITLE",
"EMAIL",
"TEL",
"BUILDING",
"CITY",
"COUNTRY",
"GEOCOORD",
"POSTCODE",
"SECADDRESS",
"STATE",
"STREET",
"DATE",
"TIME",
"BOD",
"IDCARD",
"PASSPORT",
"DRIVERLICENSE",
"SOCIALNUMBER",
"CARDISSUER",
"IP", "PASS", "SEX",
"USERNAME",
];
}
impl ModelDescriptor for YonigoPiiDescriptor {
fn model_id(&self) -> &str {
"yonigo-pii-v1"
}
fn version(&self) -> &str {
"1.0.0"
}
fn label_space_version(&self) -> &str {
"8class-v1"
}
fn id2label(&self) -> &[&'static str] {
Self::NATIVE_LABELS
}
fn canonical_mapping(&self, native_label: &str) -> Option<PrivacyLabel> {
match native_label {
"GIVENNAME1" | "GIVENNAME2" | "LASTNAME1" | "LASTNAME2" | "LASTNAME3" | "TITLE" => {
Some(PrivacyLabel::Person)
}
"EMAIL" => Some(PrivacyLabel::Email),
"TEL" => Some(PrivacyLabel::Phone),
"BUILDING" | "CITY" | "COUNTRY" | "GEOCOORD" | "POSTCODE" | "SECADDRESS" | "STATE"
| "STREET" => Some(PrivacyLabel::Address),
"DATE" | "TIME" | "BOD" => Some(PrivacyLabel::Date),
"IDCARD" | "PASSPORT" | "DRIVERLICENSE" | "SOCIALNUMBER" | "CARDISSUER" => {
Some(PrivacyLabel::AccountNumber)
}
"IP" => Some(PrivacyLabel::Url),
"PASS" => Some(PrivacyLabel::Secret),
"SEX" | "USERNAME" => None,
_ => None,
}
}
fn tokenizer_spec(&self) -> TokenizerSpec {
TokenizerSpec::HuggingFaceJson
}
fn post_processor(&self) -> PostProcessorKind {
PostProcessorKind::Bio
}
fn onnx_filename(&self) -> &str {
"model.onnx"
}
fn threshold_profile(&self) -> Option<&ThresholdProfile> {
Some(&YONIGO_PROFILE)
}
}
static YONIGO_PROFILE: once_cell::sync::Lazy<ThresholdProfile> = once_cell::sync::Lazy::new(|| {
use std::collections::BTreeMap;
let mut t = BTreeMap::new();
t.insert(PrivacyLabel::Person, 1.1_f32);
t.insert(PrivacyLabel::AccountNumber, 1.1_f32);
t.insert(PrivacyLabel::Address, 1.1_f32);
ThresholdProfile { thresholds: t }
});
#[cfg(test)]
pub(crate) fn assert_canonical_mapping_total<D: ModelDescriptor>(
descriptor: &D,
expected_unmapped: &[&str],
) {
let mut missing: Vec<&str> = Vec::new();
for &native in descriptor.id2label() {
let mapped = descriptor.canonical_mapping(native);
if mapped.is_none() && !expected_unmapped.contains(&native) {
missing.push(native);
}
}
assert!(
missing.is_empty(),
"ModelDescriptor[{}] canonical_mapping 隐式遗漏 native labels: {:?}\n\
(修复:在 canonical_mapping 加映射,或加入 expected_unmapped 显式忽略名单)",
descriptor.model_id(),
missing
);
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
mod tests {
use super::*;
#[test]
fn descriptor_trait_compiles_and_basic_fields() {
let d = OpenAIPrivacyFilterDescriptor;
assert_eq!(d.model_id(), "openai-privacy-filter-v1");
assert_eq!(d.version(), "1.0.0");
assert_eq!(d.label_space_version(), "8class-v1");
assert_eq!(d.tokenizer_spec(), TokenizerSpec::HuggingFaceJson);
assert_eq!(d.post_processor(), PostProcessorKind::Bioes);
assert!(
d.threshold_profile().is_none(),
"R1h+ verdict:OpenAI 不调 threshold(留 v0.6 baseline,推 v0.7-α6+ cross-engine)"
);
}
#[test]
fn openai_descriptor_canonical_mapping_total() {
assert_canonical_mapping_total(&OpenAIPrivacyFilterDescriptor, &[]);
}
#[test]
fn openai_descriptor_covers_all_8_canonical_labels() {
let d = OpenAIPrivacyFilterDescriptor;
let mut covered: Vec<PrivacyLabel> = Vec::new();
for &native in d.id2label() {
if let Some(label) = d.canonical_mapping(native) {
if !covered.contains(&label) {
covered.push(label);
}
}
}
for &expected in PrivacyLabel::ALL.iter() {
assert!(
covered.contains(&expected),
"OpenAI descriptor 未覆盖 canonical label {:?}",
expected
);
}
}
#[test]
fn label_space_version_has_version_suffix() {
let v = OpenAIPrivacyFilterDescriptor.label_space_version();
assert!(
v.contains("v1") || v.contains("v2") || v.contains("v3"),
"label_space_version '{}' 应含 vN 后缀",
v
);
}
#[test]
fn descriptor_is_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<OpenAIPrivacyFilterDescriptor>();
}
#[test]
fn descriptor_is_dyn_compatible() {
let d: Box<dyn ModelDescriptor> = Box::new(OpenAIPrivacyFilterDescriptor);
assert_eq!(d.model_id(), "openai-privacy-filter-v1");
}
#[test]
fn xlmr_descriptor_basic_fields() {
let d = XlmrPiiDescriptor::default();
assert_eq!(d.model_id(), "xlmr-pii-v1");
assert_eq!(d.version(), "1.0.0");
assert_eq!(d.label_space_version(), "8class-v1");
assert_eq!(
d.post_processor(),
PostProcessorKind::Bio,
"xlmr 是 BIO scheme(spike-1 实证)"
);
}
#[test]
fn xlmr_descriptor_canonical_mapping_total() {
assert_canonical_mapping_total(&XlmrPiiDescriptor::default(), &["AGE", "GENDER", "SEX"]);
}
#[test]
fn xlmr_descriptor_covers_6_of_8_canonical() {
let d = XlmrPiiDescriptor::default();
let mut covered: Vec<PrivacyLabel> = Vec::new();
for &native in d.id2label() {
if let Some(label) = d.canonical_mapping(native) {
if !covered.contains(&label) {
covered.push(label);
}
}
}
let expected_covered = [
PrivacyLabel::Person,
PrivacyLabel::Email,
PrivacyLabel::Phone,
PrivacyLabel::Address,
PrivacyLabel::Date,
PrivacyLabel::AccountNumber,
];
for label in expected_covered {
assert!(covered.contains(&label), "xlmr 应覆盖 {:?}", label);
}
assert!(
!covered.contains(&PrivacyLabel::Url),
"xlmr 不应直接覆盖 Url(Hard rules 兜底)"
);
assert!(
!covered.contains(&PrivacyLabel::Secret),
"xlmr 不应直接覆盖 Secret(Hard rules 兜底)"
);
}
#[test]
fn yonigo_descriptor_basic_fields() {
let d = YonigoPiiDescriptor;
assert_eq!(d.model_id(), "yonigo-pii-v1");
assert_eq!(d.version(), "1.0.0");
assert_eq!(d.label_space_version(), "8class-v1");
assert_eq!(d.post_processor(), PostProcessorKind::Bio);
}
#[test]
fn yonigo_descriptor_canonical_mapping_total() {
assert_canonical_mapping_total(&YonigoPiiDescriptor, &["SEX", "USERNAME"]);
}
#[test]
fn yonigo_descriptor_covers_all_8_canonical_via_ip_pass() {
let d = YonigoPiiDescriptor;
let mut covered: Vec<PrivacyLabel> = Vec::new();
for &native in d.id2label() {
if let Some(label) = d.canonical_mapping(native) {
if !covered.contains(&label) {
covered.push(label);
}
}
}
for label in PrivacyLabel::ALL {
assert!(
covered.contains(&label),
"yonigo 应覆盖 canonical {:?}(IP→Url, PASS→Secret 是 yonigo 独家)",
label
);
}
}
#[test]
fn three_descriptors_dyn_compatible_collection() {
let descriptors: Vec<Box<dyn ModelDescriptor>> = vec![
Box::new(OpenAIPrivacyFilterDescriptor),
Box::new(XlmrPiiDescriptor::default()),
Box::new(YonigoPiiDescriptor),
];
let ids: Vec<&str> = descriptors.iter().map(|d| d.model_id()).collect();
assert_eq!(ids.len(), 3);
let mut sorted = ids.clone();
sorted.sort();
sorted.dedup();
assert_eq!(sorted.len(), 3, "三 descriptor model_id 必互异");
}
#[test]
fn all_descriptors_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<OpenAIPrivacyFilterDescriptor>();
assert_send_sync::<XlmrPiiDescriptor>();
assert_send_sync::<YonigoPiiDescriptor>();
}
#[test]
fn post_processor_bio_variant_visible() {
let openai = OpenAIPrivacyFilterDescriptor.post_processor();
let xlmr = XlmrPiiDescriptor::default().post_processor();
let yonigo = YonigoPiiDescriptor.post_processor();
assert!(matches!(openai, PostProcessorKind::Bioes));
assert!(matches!(xlmr, PostProcessorKind::Bio));
assert!(matches!(yonigo, PostProcessorKind::Bio));
}
#[test]
fn descriptors_onnx_filename_match_layout() {
assert_eq!(
OpenAIPrivacyFilterDescriptor.onnx_filename(),
"model_q4f16.onnx",
"OpenAI default 路径"
);
assert_eq!(
XlmrPiiDescriptor::default().onnx_filename(),
"onnx/model_q4f16.onnx",
"xlmr 在 onnx/ 子目录(onnx-community/multilang-pii-ner-ONNX 仓库布局)"
);
assert_eq!(
YonigoPiiDescriptor.onnx_filename(),
"model.onnx",
"yonigo optimum-cli 导出无 q4f16 后缀"
);
}
#[test]
fn descriptors_onnx_filename_relative_path_invariant() {
for d in [
&OpenAIPrivacyFilterDescriptor as &dyn ModelDescriptor,
&XlmrPiiDescriptor::default(),
&YonigoPiiDescriptor,
] {
let f = d.onnx_filename();
assert!(
!f.starts_with('/'),
"onnx_filename 必相对(不带 leading /),实际 '{}' for {}",
f,
d.model_id()
);
assert!(f.ends_with(".onnx"), "应以 .onnx 结尾: '{}'", f);
}
}
#[test]
fn xlmr_threshold_profile_masks_high_fp_labels() {
let d = XlmrPiiDescriptor::default();
let profile = d.threshold_profile().expect("xlmr 应有 threshold profile");
for label in [
PrivacyLabel::Email,
PrivacyLabel::Phone,
PrivacyLabel::Address,
] {
let t = profile.thresholds.get(&label).copied().unwrap_or(0.0_f32);
assert!(
t > 1.0,
"xlmr {:?} threshold 应 > 1.0(屏蔽),实际 {}",
label,
t
);
}
assert!(!profile.thresholds.contains_key(&PrivacyLabel::Date));
}
#[test]
fn yonigo_threshold_profile_masks_high_fp_labels() {
let d = YonigoPiiDescriptor;
let profile = d
.threshold_profile()
.expect("yonigo 应有 threshold profile");
for label in [
PrivacyLabel::Person,
PrivacyLabel::AccountNumber,
PrivacyLabel::Address,
] {
let t = profile.thresholds.get(&label).copied().unwrap_or(0.0_f32);
assert!(t > 1.0, "yonigo {:?} threshold 应 > 1.0,实际 {}", label, t);
}
assert!(!profile.thresholds.contains_key(&PrivacyLabel::Email));
assert!(!profile.thresholds.contains_key(&PrivacyLabel::Phone));
}
#[test]
fn openai_no_threshold_profile_r1h_plus_verdict() {
assert!(
OpenAIPrivacyFilterDescriptor.threshold_profile().is_none(),
"R1h+ 实验后 OpenAI 撤回所有 threshold(conf-only filter 路径终止)"
);
}
#[test]
fn xlmr_default_profile_blocks_email_phone_address_only() {
let profile = &*XLMR_PROFILE;
for lbl in [
PrivacyLabel::Email,
PrivacyLabel::Phone,
PrivacyLabel::Address,
] {
let t = profile.thresholds.get(&lbl).copied().unwrap_or(0.0);
assert!(t > 1.0, "xlmr {:?} 应屏蔽 (> 1.0),实际 {}", lbl, t);
}
assert!(
!profile.thresholds.contains_key(&PrivacyLabel::Person),
"xlmr Person 不应在 default profile — v0.9 Sprint 0 P0 已把 1.1 包成 opt-in fp_strict;\
default path 保持 v0.8 baseline(EU recall 0.904)"
);
}
#[test]
fn xlmr_fp_strict_profile_is_superset_of_default_plus_person() {
let default_profile = &*XLMR_PROFILE;
let fp_strict_profile = &*XLMR_PROFILE_FP_STRICT;
for (lbl, default_t) in &default_profile.thresholds {
let strict_t = fp_strict_profile
.thresholds
.get(lbl)
.copied()
.unwrap_or(0.0);
assert!(
(strict_t - *default_t).abs() < 1e-6,
"fp_strict 必须包含 default 的 {:?}(threshold {} 应等于 default {})",
lbl,
strict_t,
default_t
);
}
let person_t = fp_strict_profile
.thresholds
.get(&PrivacyLabel::Person)
.copied()
.unwrap_or(0.0);
assert!(
person_t > 1.0,
"fp_strict Person 应屏蔽 (> 1.0,实际 {})",
person_t
);
assert_eq!(
fp_strict_profile.thresholds.len(),
default_profile.thresholds.len() + 1,
"fp_strict label 数应 = default + 1(加 Person)"
);
}
static ENV_TEST_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
#[test]
fn xlmr_profile_from_env_select_and_fallback() {
let _guard = ENV_TEST_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let original = std::env::var("VIGIL_XLMR_PROFILE").ok();
let restore = || match &original {
Some(v) => std::env::set_var("VIGIL_XLMR_PROFILE", v),
None => std::env::remove_var("VIGIL_XLMR_PROFILE"),
};
std::env::remove_var("VIGIL_XLMR_PROFILE");
let p = xlmr_profile_from_env();
assert!(
!p.thresholds.contains_key(&PrivacyLabel::Person),
"unset env → default(Person 不屏蔽)"
);
std::env::set_var("VIGIL_XLMR_PROFILE", "fp_strict");
let p = xlmr_profile_from_env();
let person_t = p
.thresholds
.get(&PrivacyLabel::Person)
.copied()
.unwrap_or(0.0);
assert!(
person_t > 1.0,
"fp_strict env → fp_strict profile(Person 屏蔽,实际 {})",
person_t
);
for unknown in ["strict", "FP_STRICT", "garbage", ""] {
std::env::set_var("VIGIL_XLMR_PROFILE", unknown);
let p = xlmr_profile_from_env();
assert!(
!p.thresholds.contains_key(&PrivacyLabel::Person),
"unknown env value {:?} 应 fallback default(Person 不屏蔽)",
unknown
);
}
restore();
}
#[test]
fn lang_conditional_builder_basic() {
let default = ThresholdProfile {
thresholds: [
(PrivacyLabel::Email, 1.1_f32),
(PrivacyLabel::Phone, 1.1_f32),
]
.into_iter()
.collect(),
};
let p = LangConditionalThresholdProfile::new(default)
.with_override("de", PrivacyLabel::Person, 1.1_f32)
.with_override("it", PrivacyLabel::AccountNumber, 1.1_f32);
assert_eq!(p.overrides.len(), 2);
assert_eq!(
p.overrides.get(&("de".to_string(), PrivacyLabel::Person)),
Some(&1.1_f32)
);
}
#[test]
fn lang_conditional_threshold_for_priority() {
let default = ThresholdProfile {
thresholds: [(PrivacyLabel::Email, 0.5_f32)].into_iter().collect(),
};
let p = LangConditionalThresholdProfile::new(default).with_override(
"de",
PrivacyLabel::Person,
1.1_f32,
);
assert_eq!(
p.threshold_for(PrivacyLabel::Person, Some("de")),
Some(1.1_f32),
"de × Person 命中 override"
);
assert_eq!(
p.threshold_for(PrivacyLabel::Email, Some("de")),
Some(0.5_f32),
"de 但 Email 未在 overrides → default"
);
assert_eq!(
p.threshold_for(PrivacyLabel::Person, Some("en")),
None,
"en × Person 既非 override 也非 default → None"
);
assert_eq!(
p.threshold_for(PrivacyLabel::Email, None),
Some(0.5_f32),
"lang None + Email 在 default → default"
);
assert_eq!(
p.threshold_for(PrivacyLabel::Address, None),
None,
"Address 既非 override 也非 default → None"
);
}
#[test]
fn xlmr_lang_conditional_profile_top_6_overrides() {
let p = &*XLMR_LANG_CONDITIONAL_PROFILE;
let candidates: &[(&str, PrivacyLabel)] = &[
("it", PrivacyLabel::AccountNumber), ("de", PrivacyLabel::Person), ("fr", PrivacyLabel::AccountNumber), ("de", PrivacyLabel::AccountNumber), ("en", PrivacyLabel::Person), ("zh", PrivacyLabel::AccountNumber), ];
for (lang, label) in candidates {
assert_eq!(
p.threshold_for(*label, Some(lang)),
Some(1.1_f32),
"{:?} × {:?} 应在 overrides(spike candidates top 6)",
lang,
label
);
}
assert_eq!(p.overrides.len(), candidates.len(), "top 6 候选完整");
for label in [
PrivacyLabel::Email,
PrivacyLabel::Phone,
PrivacyLabel::Address,
] {
let t = p.default.thresholds.get(&label).copied().unwrap_or(0.0);
assert!(
t > 1.0,
"default {:?} 应继承 XLMR_PROFILE 屏蔽(>1.0,实际 {})",
label,
t
);
}
assert!(
!p.default.thresholds.contains_key(&PrivacyLabel::Person),
"default Person 不应屏蔽(v0.8 baseline);仅 lang-conditional 启用"
);
}
#[test]
fn xlmr_default_path_unchanged_by_p1_1() {
let _guard = ENV_TEST_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let original = std::env::var("VIGIL_XLMR_PROFILE").ok();
std::env::remove_var("VIGIL_XLMR_PROFILE");
let d = XlmrPiiDescriptor::default();
let profile = d.threshold_profile().expect("xlmr profile");
assert!(
!profile.thresholds.contains_key(&PrivacyLabel::Person),
"v0.8 default path 必须保持 Person 不屏蔽(P1.1 不接 lang-conditional 路径)"
);
match original {
Some(v) => std::env::set_var("VIGIL_XLMR_PROFILE", v),
None => std::env::remove_var("VIGIL_XLMR_PROFILE"),
}
}
#[test]
fn lang_override_wins_even_if_weaker_than_default() {
let mut default_thresholds = std::collections::BTreeMap::new();
default_thresholds.insert(PrivacyLabel::Person, 0.8_f32); let default = ThresholdProfile {
thresholds: default_thresholds,
};
let p = LangConditionalThresholdProfile::new(default).with_override(
"de",
PrivacyLabel::Person,
0.0_f32,
);
assert_eq!(
p.threshold_for(PrivacyLabel::Person, Some("de")),
Some(0.0_f32),
"lang override 必须优先于 default,即使数值弱(caller 显式决策语义)"
);
assert_eq!(
p.threshold_for(PrivacyLabel::Person, None),
Some(0.8_f32),
"lang None → fallback default(本测试同时验 None 路径)"
);
}
#[test]
fn xlmr_default_descriptor_legacy_env_driven_path() {
let _guard = ENV_TEST_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let original = std::env::var("VIGIL_XLMR_PROFILE").ok();
std::env::remove_var("VIGIL_XLMR_PROFILE");
let d = XlmrPiiDescriptor::default();
let profile = d.threshold_profile().expect("xlmr profile");
assert!(
!profile.thresholds.contains_key(&PrivacyLabel::Person),
"default descriptor + env unset → default profile(Person 不屏蔽)"
);
match original {
Some(v) => std::env::set_var("VIGIL_XLMR_PROFILE", v),
None => std::env::remove_var("VIGIL_XLMR_PROFILE"),
}
}
#[test]
fn xlmr_default_descriptor_legacy_env_fp_strict() {
let _guard = ENV_TEST_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let original = std::env::var("VIGIL_XLMR_PROFILE").ok();
std::env::set_var("VIGIL_XLMR_PROFILE", "fp_strict");
let d = XlmrPiiDescriptor::default();
let profile = d.threshold_profile().expect("xlmr profile");
assert!(
profile.thresholds.contains_key(&PrivacyLabel::Person),
"default descriptor + env=fp_strict → fp_strict profile(Person 屏蔽)"
);
match original {
Some(v) => std::env::set_var("VIGIL_XLMR_PROFILE", v),
None => std::env::remove_var("VIGIL_XLMR_PROFILE"),
}
}
#[test]
fn xlmr_typed_default_mode_ignores_env() {
let _guard = ENV_TEST_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let original = std::env::var("VIGIL_XLMR_PROFILE").ok();
std::env::set_var("VIGIL_XLMR_PROFILE", "fp_strict");
let d = XlmrPiiDescriptor::with_mode(XlmrProfileMode::Default);
let profile = d.threshold_profile().expect("xlmr profile");
assert!(
!profile.thresholds.contains_key(&PrivacyLabel::Person),
"typed Default 必须忽略 env=fp_strict;Person 不屏蔽"
);
assert!(
profile.thresholds.contains_key(&PrivacyLabel::Email),
"typed Default 必须含 Email 屏蔽(v0.8 baseline)"
);
match original {
Some(v) => std::env::set_var("VIGIL_XLMR_PROFILE", v),
None => std::env::remove_var("VIGIL_XLMR_PROFILE"),
}
}
#[test]
fn xlmr_typed_fp_strict_mode_ignores_env() {
let _guard = ENV_TEST_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let original = std::env::var("VIGIL_XLMR_PROFILE").ok();
std::env::remove_var("VIGIL_XLMR_PROFILE");
let d = XlmrPiiDescriptor::with_mode(XlmrProfileMode::FpStrict);
let profile = d.threshold_profile().expect("xlmr profile");
let person_t = profile
.thresholds
.get(&PrivacyLabel::Person)
.copied()
.unwrap_or(0.0);
assert!(
person_t > 1.0,
"typed FpStrict 必须屏蔽 Person (>1.0,实际 {})",
person_t
);
match original {
Some(v) => std::env::set_var("VIGIL_XLMR_PROFILE", v),
None => std::env::remove_var("VIGIL_XLMR_PROFILE"),
}
}
#[test]
#[allow(unreachable_patterns)]
fn xlmr_profile_mode_non_exhaustive_match_compiles() {
let mode = XlmrProfileMode::Default;
let label = match mode {
XlmrProfileMode::Default => "default",
XlmrProfileMode::FpStrict => "fp_strict",
_ => "unknown_future",
};
assert_eq!(label, "default");
}
#[test]
fn lang_conditional_default_independent_of_env() {
let _guard = ENV_TEST_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let original = std::env::var("VIGIL_XLMR_PROFILE").ok();
std::env::set_var("VIGIL_XLMR_PROFILE", "fp_strict");
let env_profile = xlmr_profile_from_env();
assert!(
env_profile.thresholds.contains_key(&PrivacyLabel::Person),
"env=fp_strict 时 threshold_profile 路径应含 Person"
);
let lang_default = &XLMR_LANG_CONDITIONAL_PROFILE.default;
assert!(
!lang_default.thresholds.contains_key(&PrivacyLabel::Person),
"lang-conditional default 字段独立于 env(不含 Person);env 切换仅影响 threshold_profile() 路径"
);
match original {
Some(v) => std::env::set_var("VIGIL_XLMR_PROFILE", v),
None => std::env::remove_var("VIGIL_XLMR_PROFILE"),
}
}
#[test]
fn descriptor_default_onnx_filename_fallback() {
struct NoOverrideDescriptor;
impl ModelDescriptor for NoOverrideDescriptor {
fn model_id(&self) -> &str {
"test-no-override"
}
fn version(&self) -> &str {
"0.0.0"
}
fn label_space_version(&self) -> &str {
"test-v0"
}
fn id2label(&self) -> &[&'static str] {
&[]
}
fn canonical_mapping(&self, _: &str) -> Option<PrivacyLabel> {
None
}
fn tokenizer_spec(&self) -> TokenizerSpec {
TokenizerSpec::HuggingFaceJson
}
fn post_processor(&self) -> PostProcessorKind {
PostProcessorKind::Bio
}
}
assert_eq!(
NoOverrideDescriptor.onnx_filename(),
"model_q4f16.onnx",
"default 实现应返 model_q4f16.onnx"
);
}
}