vigil_redaction/
ensemble.rs

1//! v0.7-α3 Phase 3 S3(E6a) — Ensemble layer:多模型 union + IoU dedup。
2//!
3//! 把多个 [`crate::engine::RedactionEngine`] 实例并联,产出合并的 model findings。
4//! 算法移植自 `scripts/spike-p3/bench_ensemble.py::ensemble_merge`(Python POC 已
5//! 验证 EU recall 0.895 vs OpenAI 单模型 0.75-0.85)。
6//!
7//! # 设计纪律
8//!
9//! - **职责分离**:ensemble 只融合 model 侧(N 个 RedactionEngine),Hard rules 由
10//!   外层 [`crate::scan::scan_text_with_engine`] 在 `merge_findings(hard, model)`
11//!   末段统一 merge。EnsembleEngine **不**在内部跑 hard regex,避免与 ADR 0013 D1
12//!   Hard 优先决策双 source。
13//! - **canonical-label IoU dedup**:同 canonical kind 重叠(IoU ≥ 0.5)→ 取 longer
14//!   span;不同 canonical 重叠 → 都保留(留给 caller 审计层决策)。
15//! - **失败语义**:任一引擎返 Err → propagate(整 ensemble 失败,fail-closed);
16//!   单引擎成功 + 单引擎失败的 graceful degrade 推 v0.7-α4(本 S3 保严格)。
17//! - **顺序**:engines 按构造顺序串行调用;同 IoU 时**先到先记**,后来者只在更长
18//!   span 时替换(spike-3 Python 同语义)。
19//!
20//! # 不变量保留
21//!
22//! - 返回 findings 仍是 [`Finding`] 类型(同 `RedactionEngine.infer` 契约)
23//! - `risk_delta` 由 caller 在 `scan_text_with_engine` 重新补值(C-7 决议;
24//!   ensemble 层不依赖 risk 表)
25//! - confidence 取 ensemble 内**首个**命中 finding 的(简化策略;merge 优先级
26//!   不参与 confidence 决策)
27
28use std::sync::Arc;
29
30use crate::engine::{EngineError, RedactionEngine};
31use crate::merge::Finding;
32
33/// IoU 重叠阈值(NER 领域标准 0.5;与 spike-3 Python POC 同口径)。
34const IOU_THRESHOLD: f64 = 0.5;
35
36/// 多引擎 union ensemble。
37///
38/// **典型用例**(S4 后接 firewall config):
39///
40/// ```ignore
41/// use std::sync::Arc;
42/// use vigil_redaction::engine::{NoopEngine, RedactionEngine};
43/// use vigil_redaction::ensemble::EnsembleEngine;
44///
45/// let engines: Vec<Arc<dyn RedactionEngine>> = vec![
46///     Arc::new(NoopEngine),
47///     Arc::new(NoopEngine),
48/// ];
49/// let ens = EnsembleEngine::new(engines);
50/// let _findings = ens.infer("hello").unwrap();
51/// ```
52pub struct EnsembleEngine {
53    engines: Vec<Arc<dyn RedactionEngine>>,
54    /// v0.7-α5 A step(E6a)— 双确认 label 集合(opt-in,Codex § 2 ACCEPT)。
55    /// 在此集合的 canonical label 必须由 ≥ 2 不同 engine 同 IoU 区域共识才保留;
56    /// 单 engine 报即丢(降 FP 高风险类:Address/Date/AccountNumber)。
57    /// 默认空 = 关闭(原 R1h 行为不变)。
58    dual_confirm_labels: std::collections::BTreeSet<crate::label::PrivacyLabel>,
59    /// v0.8 Sprint 3 P2.0(E6a+) — caller 提供的 model_id 数组(并列于 engines vec)。
60    /// `infer_with_attribution` 用此查 engine 名字,bench / diagnose 工具需暴露
61    /// per-engine attribution。默认空 → attribution 返 ["unknown-N"]。
62    model_ids: Vec<String>,
63}
64
65// 手动 impl Debug:`Vec<Arc<dyn RedactionEngine>>` 不能 derive(trait object 无 Debug
66// supertrait,加 supertrait 是 breaking change for external implementer)。手写实现
67// 暴露结构性信息(engine 数量 + dual_confirm 集 + model_ids),不暴露 engine 内部状态。
68impl std::fmt::Debug for EnsembleEngine {
69    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
70        f.debug_struct("EnsembleEngine")
71            .field("engine_count", &self.engines.len())
72            .field("dual_confirm_labels", &self.dual_confirm_labels)
73            .field("model_ids", &self.model_ids)
74            .finish_non_exhaustive()
75    }
76}
77
78/// 内部 ensemble 追踪:per-finding engine 来源(不暴露 SDK,避免 SemVer 锁定)。
79/// dual_confirm 模式下用于检查同 IoU 区域是否有多 engine 共识。
80struct PerEngineFinding {
81    engine_idx: usize,
82    finding: Finding,
83}
84
85/// v0.8 Sprint 3 P2.0 — 单 finding 的 cross-engine attribution。
86///
87/// 在 [`EnsembleEngine::infer_with_attribution`] 输出中,与 `findings: Vec<Finding>`
88/// 平行返回:`finding_index` 对应 findings 数组位置,`contributing_engines` 是
89/// 同 IoU cluster 内贡献的所有 engine model_id(distinct + 排序)。
90///
91/// **dual_confirm 数据驱动校准**(Sprint 3 主用例):
92/// - cluster size = 1 → 单 engine 报告,可能是该 engine 主导 label(如 hard 在 secret)
93/// - cluster size ≥ 2 → 多 engine 共识,降 FP 信号强
94/// - 用 P1.2 diagnose_per_label.py 可识别 per-engine TP/FP 矩阵 + per-label
95///   N-engine 共识贡献率
96///
97/// **R1 MUST-FIX(Codex 019deb45)— SemVer**:`#[non_exhaustive]` 强制 caller 用
98/// 模式匹配时写 `_`,允许未来加 `cluster_id` / score / span_source 等字段而不破
99/// SemVer。caller 仍可读 pub 字段(`attr.finding_index` / `attr.contributing_engines`),
100/// 但**不能**用 struct literal 在 crate 外构造(必须用 `EnsembleEngine::infer_with_attribution`
101/// 拿到实例)。
102#[derive(Debug, Clone, PartialEq, Eq)]
103#[non_exhaustive]
104pub struct EngineAttribution {
105    /// 在 [`EnsembleEngine::infer_with_attribution`] 返 `findings` 数组中的位置
106    pub finding_index: usize,
107    /// 同 IoU cluster 内贡献的 engine model_id(distinct,按字典序排序便于稳定输出)。
108    /// 若 caller 未通过 [`EnsembleEngine::with_model_ids`] 提供 ids,默认 `"unknown-{idx}"`。
109    pub contributing_engines: Vec<String>,
110}
111
112impl EnsembleEngine {
113    /// 构造 ensemble:`engines` 顺序决定 IoU dedup 时的"first-come"优先(后来者
114    /// 只在 span 更长时替换 — 与 spike-3 Python POC 同语义)。
115    pub fn new(engines: Vec<Arc<dyn RedactionEngine>>) -> Self {
116        Self {
117            engines,
118            dual_confirm_labels: std::collections::BTreeSet::new(),
119            model_ids: Vec::new(),
120        }
121    }
122
123    /// v0.8 Sprint 3 P2.0 — 提供 model_id 数组(下标对应 engines vec)。
124    ///
125    /// `infer_with_attribution` 用此返 `EngineAttribution.contributing_engines`
126    /// 含真实 model_id;若不调此 builder,attribution 返 `"unknown-{idx}"`。
127    /// 长度不一致(`ids.len() != engines.len()`)→ panic(配置错误,fail-fast)。
128    pub fn with_model_ids<I, S>(mut self, ids: I) -> Self
129    where
130        I: IntoIterator<Item = S>,
131        S: Into<String>,
132    {
133        let collected: Vec<String> = ids.into_iter().map(Into::into).collect();
134        assert_eq!(
135            collected.len(),
136            self.engines.len(),
137            "EnsembleEngine::with_model_ids:ids.len()={} 与 engines.len()={} 不匹配",
138            collected.len(),
139            self.engines.len()
140        );
141        self.model_ids = collected;
142        self
143    }
144
145    /// v0.7-α5 A step — 启用 cross-engine 双确认 for 指定 canonical label。
146    ///
147    /// 在 `labels` 集合内的 canonical label,EnsembleEngine.infer 仅保留**有 ≥ 2
148    /// 不同 engine** 同 IoU ≥ 0.5 区域共识的 finding;单 engine 报即丢。
149    /// 不在集合的 label 走原 union + IoU dedup(R1h 行为不变)。
150    ///
151    /// **典型用法**(Codex 推荐 high-FP 类):
152    /// ```ignore
153    /// let ensemble = EnsembleEngine::new(engines)
154    ///     .with_dual_confirm([PrivacyLabel::Address, PrivacyLabel::Date,
155    ///                         PrivacyLabel::AccountNumber]);
156    /// ```
157    pub fn with_dual_confirm<I>(mut self, labels: I) -> Self
158    where
159        I: IntoIterator<Item = crate::label::PrivacyLabel>,
160    {
161        self.dual_confirm_labels = labels.into_iter().collect();
162        self
163    }
164
165    /// 引擎数量(诊断 / 配置展示用)
166    pub fn engine_count(&self) -> usize {
167        self.engines.len()
168    }
169
170    /// v0.8 Sprint 3 P2.0 — ensemble 推理 + per-finding cross-engine attribution。
171    ///
172    /// **与 [`Self::infer`] 区别**:平行返 `Vec<EngineAttribution>`,每元素描述
173    /// 同位 finding 的 IoU cluster 内 distinct 贡献 engine 集合。**findings 顺序与
174    /// attributions 顺序严格一致**(并列下标);`infer()` 仍走原 union dedup 路径。
175    ///
176    /// **算法**:复用 [`ensemble_merge_with_dual_confirm`] 内部 cluster 逻辑,
177    /// 但保留每 cluster 的 distinct engine_idx 集合 + 经 `model_ids` 转字符串。
178    ///
179    /// **caller 用例**:
180    /// - bench / diagnose:per-engine × per-label TP/FP 真矩阵(Sprint 3 dual_confirm
181    ///   校准必备)
182    /// - 审计 cross-trace:某 finding 由哪些 engine 共识产出(供 ADR 0017 evolution)
183    ///
184    /// **v0.9 Sprint 1 P1.3 NICE(Codex 019e03b7)+ v0.10 Sprint 3 兑付**:
185    /// 本方法走 baseline path(`infer_with_attribution_with_lang(text, None)`
186    /// 委托);如需 lang-aware attribution 用 [`Self::infer_with_attribution_with_lang`]
187    /// (v0.10 Sprint 3 加,兑付 P1.3 R1 NICE)。
188    pub fn infer_with_attribution(
189        &self,
190        text: &str,
191    ) -> Result<(Vec<Finding>, Vec<EngineAttribution>), EngineError> {
192        // **v0.10 Sprint 3 fix**:legacy 路径 → infer_with_attribution_with_lang(text, None)
193        // (lang None 等价 v0.9 行为;子 engine 走 default threshold_profile)
194        self.infer_with_attribution_with_lang(text, None)
195    }
196
197    /// **v0.10 Sprint 3** — ensemble 推理 + per-finding cross-engine attribution +
198    /// **lang 透传**(P1.3 R1 NICE 兑付,Codex `019e03b7`)。
199    ///
200    /// 与 [`Self::infer_with_attribution`] 区别:接 `lang: Option<&str>`
201    /// 参数,内部循环调用 `engine.infer_with_lang(text, lang)?` 透传到子 engine
202    /// (如 OrtEngine 走 lang-conditional threshold)。
203    ///
204    /// **解决问题**:v0.9 P1.3 实测发现 `infer_with_attribution` 不接 lang 致
205    /// BENCH_OUT JSON 在 lang_aware 模式下 attribution 与主路径数据不一致
206    /// (主 result EU FP 37 / attribution 路径仍 baseline EU FP 59)。本方法
207    /// 让 attribution 与主路径口径一致,diagnose 工具消费 lang_aware bench JSON
208    /// 时得到真 lang-conditional per-engine 矩阵。
209    ///
210    /// **lang 规范**:同 `infer_with_lang` — case-sensitive ISO 639-1 lowercase;
211    /// `None` 等价 `infer_with_attribution`(baseline 行为)。
212    pub fn infer_with_attribution_with_lang(
213        &self,
214        text: &str,
215        lang: Option<&str>,
216    ) -> Result<(Vec<Finding>, Vec<EngineAttribution>), EngineError> {
217        let mut per_engine: Vec<PerEngineFinding> = Vec::new();
218        for (idx, engine) in self.engines.iter().enumerate() {
219            // v0.10 Sprint 3:用 infer_with_lang 透传 lang(对齐主路径)
220            let f = engine.infer_with_lang(text, lang)?;
221            for finding in f {
222                per_engine.push(PerEngineFinding {
223                    engine_idx: idx,
224                    finding,
225                });
226            }
227        }
228
229        // cluster 化(与 ensemble_merge_with_dual_confirm 同口径,保 attribution)
230        let (findings, attrs) =
231            ensemble_merge_with_attribution(per_engine, &self.dual_confirm_labels, &self.model_ids);
232        Ok((findings, attrs))
233    }
234}
235
236impl RedactionEngine for EnsembleEngine {
237    fn infer(&self, text: &str) -> Result<Vec<Finding>, EngineError> {
238        // **v0.9 Sprint 1 P1.3 fix**:legacy 路径委托 infer_with_lang(text, None)
239        // (lang None 等价 v0.8 行为;子 engine 走 default threshold_profile)
240        self.infer_with_lang(text, None)
241    }
242
243    /// **v0.9 Sprint 1 P1.3 fix**(关键 bug 修复)— EnsembleEngine 必须 override
244    /// `infer_with_lang` 才能把 lang 透传到每个子 engine(如 OrtEngine 内部用
245    /// lang 查 LangConditionalThresholdProfile 命中 (lang, label) override)。
246    ///
247    /// 历史 bug:trait default `infer_with_lang(text, _lang)` 委托 `self.infer(text)`,
248    /// EnsembleEngine 不 override → ensemble 子 engine 收不到 lang → lang_conditional
249    /// override 永远 miss(remote 实测 baseline / lang_aware 数据完全相同坐实 bug)。
250    fn infer_with_lang(&self, text: &str, lang: Option<&str>) -> Result<Vec<Finding>, EngineError> {
251        let mut per_engine: Vec<PerEngineFinding> = Vec::new();
252        for (idx, engine) in self.engines.iter().enumerate() {
253            // **关键**:用 infer_with_lang 透传 lang(若 engine 不 override 等价 infer)
254            let f = engine.infer_with_lang(text, lang)?;
255            for finding in f {
256                per_engine.push(PerEngineFinding {
257                    engine_idx: idx,
258                    finding,
259                });
260            }
261        }
262        Ok(ensemble_merge_with_dual_confirm(
263            per_engine,
264            &self.dual_confirm_labels,
265        ))
266    }
267}
268
269/// IoU(byte-level interval [start, end))。与 [`crate::scan`] / spike-3 同实现。
270fn iou(a: (usize, usize), b: (usize, usize)) -> f64 {
271    let inter_start = a.0.max(b.0);
272    let inter_end = a.1.min(b.1);
273    if inter_start >= inter_end {
274        return 0.0;
275    }
276    let inter = (inter_end - inter_start) as f64;
277    let union_start = a.0.min(b.0);
278    let union_end = a.1.max(b.1);
279    let union = (union_end - union_start) as f64;
280    if union <= 0.0 {
281        0.0
282    } else {
283        inter / union
284    }
285}
286
287/// 合并多个引擎 findings:同 kind IoU ≥ 0.5 → 取 longer span;否则都保留。
288///
289/// **顺序保证**:输出按 `span.start` 升序(沿用 [`crate::merge::merge_findings`]
290/// 同口径,便于审计 / UI 渲染)。
291///
292/// 与 [`ensemble_merge_with_dual_confirm`] 的区别:本函数走纯 union(R1h 行为);
293/// dual_confirm 集合空 = 等价此函数。保留独立函数便于守门测试。
294#[allow(dead_code)] // 留 backward-compat 测试守门(_unconditional 仍走原 union)
295fn ensemble_merge(all: Vec<Finding>) -> Vec<Finding> {
296    let mut finals: Vec<Finding> = Vec::new();
297    for f in all {
298        let mut absorbed = false;
299        for slot in finals.iter_mut() {
300            if slot.kind == f.kind && iou(slot.span, f.span) >= IOU_THRESHOLD {
301                let cur_len = f.span.1.saturating_sub(f.span.0);
302                let exist_len = slot.span.1.saturating_sub(slot.span.0);
303                if cur_len > exist_len {
304                    *slot = f.clone();
305                }
306                absorbed = true;
307                break;
308            }
309        }
310        if !absorbed {
311            finals.push(f);
312        }
313    }
314    finals.sort_by_key(|f| f.span.0);
315    finals
316}
317
318/// v0.7-α5 A step — 带 dual_confirm 的 ensemble merge。
319///
320/// **算法**:
321/// 1. 按 `(canonical_label, span IoU ≥ 0.5)` 分组所有 PerEngineFinding 到 cluster
322/// 2. 每 cluster 收集 distinct `engine_idx` set
323/// 3. **dual_confirm 集合内 label**:cluster 必须含 ≥ 2 不同 engine_idx 才保留
324///    (单 engine 报丢);保留 longest span finding
325/// 4. **非 dual_confirm label**:cluster 任意 engine 共识即保留 longest span(union)
326/// 5. 不同 cluster 间 finding 都保留(只内部 dedup)
327/// 6. 输出按 `span.start` 升序
328///
329/// **canonical label 路由**:Finding.kind 经 `PrivacyLabel::from_kind` 路由到
330/// canonical;dual_confirm 检查走 canonical 维度(防 native label 漂移)。
331fn ensemble_merge_with_dual_confirm(
332    per_engine: Vec<PerEngineFinding>,
333    dual_confirm: &std::collections::BTreeSet<crate::label::PrivacyLabel>,
334) -> Vec<Finding> {
335    use crate::label::PrivacyLabel;
336
337    // Cluster: (canonical_label, [PerEngineFinding 同 IoU ≥ 0.5 区域])
338    let mut clusters: Vec<(Option<PrivacyLabel>, Vec<PerEngineFinding>)> = Vec::new();
339    for pf in per_engine {
340        let canonical = PrivacyLabel::from_kind(pf.finding.kind);
341        // 找匹配 cluster idx(避免 mutable borrow 与 move 冲突)
342        let target_idx = clusters.iter().position(|(existing_label, group)| {
343            *existing_label == canonical
344                && group
345                    .iter()
346                    .any(|g| iou(g.finding.span, pf.finding.span) >= IOU_THRESHOLD)
347        });
348        match target_idx {
349            Some(idx) => clusters[idx].1.push(pf),
350            None => clusters.push((canonical, vec![pf])),
351        }
352    }
353
354    // 每 cluster 应用 dual_confirm 规则 + 选 longest span
355    let mut finals: Vec<Finding> = Vec::new();
356    for (canonical, group) in clusters {
357        // 检查 dual_confirm 要求
358        if let Some(label) = canonical {
359            if dual_confirm.contains(&label) {
360                let distinct: std::collections::BTreeSet<usize> =
361                    group.iter().map(|p| p.engine_idx).collect();
362                if distinct.len() < 2 {
363                    // dual_confirm label 但仅 1 engine 报 — 丢弃整 cluster(降 FP)
364                    continue;
365                }
366            }
367        }
368        // 选 longest span
369        if let Some(longest) = group
370            .into_iter()
371            .map(|p| p.finding)
372            .max_by_key(|f| f.span.1.saturating_sub(f.span.0))
373        {
374            finals.push(longest);
375        }
376    }
377
378    finals.sort_by_key(|f| f.span.0);
379    finals
380}
381
382/// v0.8 Sprint 3 P2.0 — `ensemble_merge_with_dual_confirm` 的 attribution-保留变种。
383///
384/// 与 `_with_dual_confirm` 同算法(同 cluster 化 + dual_confirm 检查 + longest span),
385/// 但在 cluster 阶段额外记 distinct engine_idx,最终输出 `EngineAttribution` 数组,
386/// 与 findings 数组下标严格对齐。
387fn ensemble_merge_with_attribution(
388    per_engine: Vec<PerEngineFinding>,
389    dual_confirm: &std::collections::BTreeSet<crate::label::PrivacyLabel>,
390    model_ids: &[String],
391) -> (Vec<Finding>, Vec<EngineAttribution>) {
392    use crate::label::PrivacyLabel;
393
394    let mut clusters: Vec<(Option<PrivacyLabel>, Vec<PerEngineFinding>)> = Vec::new();
395    for pf in per_engine {
396        let canonical = PrivacyLabel::from_kind(pf.finding.kind);
397        let target_idx = clusters.iter().position(|(existing_label, group)| {
398            *existing_label == canonical
399                && group
400                    .iter()
401                    .any(|g| iou(g.finding.span, pf.finding.span) >= IOU_THRESHOLD)
402        });
403        match target_idx {
404            Some(idx) => clusters[idx].1.push(pf),
405            None => clusters.push((canonical, vec![pf])),
406        }
407    }
408
409    // 临时 Vec 收集(label,longest_finding,distinct_engines);后按 span.start 排序
410    let mut staged: Vec<(Finding, std::collections::BTreeSet<usize>)> = Vec::new();
411    for (canonical, group) in clusters {
412        let distinct: std::collections::BTreeSet<usize> =
413            group.iter().map(|p| p.engine_idx).collect();
414
415        // dual_confirm 检查
416        if let Some(label) = canonical {
417            if dual_confirm.contains(&label) && distinct.len() < 2 {
418                continue;
419            }
420        }
421        // 选 longest span
422        if let Some(longest) = group
423            .into_iter()
424            .map(|p| p.finding)
425            .max_by_key(|f| f.span.1.saturating_sub(f.span.0))
426        {
427            staged.push((longest, distinct));
428        }
429    }
430
431    staged.sort_by_key(|(f, _)| f.span.0);
432
433    let mut findings = Vec::with_capacity(staged.len());
434    let mut attrs = Vec::with_capacity(staged.len());
435    for (idx, (finding, distinct)) in staged.into_iter().enumerate() {
436        let mut contributing_engines: Vec<String> = distinct
437            .into_iter()
438            .map(|engine_idx| {
439                model_ids
440                    .get(engine_idx)
441                    .cloned()
442                    .unwrap_or_else(|| format!("unknown-{engine_idx}"))
443            })
444            .collect();
445        // 按字符串字典序稳定输出(独立于 engine 注册顺序;Sprint 3 diagnose 工具消费稳)
446        contributing_engines.sort();
447        findings.push(finding);
448        attrs.push(EngineAttribution {
449            finding_index: idx,
450            contributing_engines,
451        });
452    }
453
454    (findings, attrs)
455}
456
457// ─────────────────────────── 单测(mock-engine 驱动)───────────────────────────
458
459#[cfg(test)]
460#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
461mod tests {
462    use super::*;
463    use crate::engine::{MockEngine, NoopEngine};
464
465    #[test]
466    fn ensemble_empty_engines_returns_empty() {
467        let ens = EnsembleEngine::new(vec![]);
468        let f = ens.infer("text").unwrap();
469        assert!(f.is_empty(), "0 engines 应返空 findings");
470        assert_eq!(ens.engine_count(), 0);
471    }
472
473    #[test]
474    fn ensemble_single_noop_returns_empty() {
475        let ens = EnsembleEngine::new(vec![Arc::new(NoopEngine)]);
476        let f = ens.infer("hello world").unwrap();
477        assert!(f.is_empty());
478        assert_eq!(ens.engine_count(), 1);
479    }
480
481    #[test]
482    fn ensemble_two_engines_disjoint_findings_both_kept() {
483        // engine_a 标 person (0,5);engine_b 标 email (10,30) — 无重叠
484        let a = Arc::new(MockEngine::from_findings(vec![Finding::model(
485            "person",
486            (0, 5),
487            0.9,
488            5,
489        )]));
490        let b = Arc::new(MockEngine::from_findings(vec![Finding::model(
491            "email",
492            (10, 30),
493            0.95,
494            10,
495        )]));
496        let ens = EnsembleEngine::new(vec![a, b]);
497        let f = ens.infer("anything").unwrap();
498        assert_eq!(f.len(), 2, "无重叠应都保留");
499        // 升序
500        assert_eq!(f[0].span, (0, 5));
501        assert_eq!(f[1].span, (10, 30));
502    }
503
504    #[test]
505    fn ensemble_same_kind_overlapping_picks_longer_span() {
506        // engine_a 给短 span (0, 5) "John";engine_b 给完整 span (0, 10) "John Smith"
507        // 同 kind person + IoU = 5/10 = 0.5 → 取 longer
508        let a = Arc::new(MockEngine::from_findings(vec![Finding::model(
509            "person",
510            (0, 5),
511            0.9,
512            5,
513        )]));
514        let b = Arc::new(MockEngine::from_findings(vec![Finding::model(
515            "person",
516            (0, 10),
517            0.85,
518            5,
519        )]));
520        let ens = EnsembleEngine::new(vec![a, b]);
521        let f = ens.infer("anything").unwrap();
522        assert_eq!(f.len(), 1, "同 kind IoU >= 0.5 应合并");
523        assert_eq!(f[0].span, (0, 10), "应取 longer span (10 > 5)");
524    }
525
526    #[test]
527    fn ensemble_same_kind_low_iou_both_kept() {
528        // person (0, 5) "John";person (20, 30) "Smith" — 不重叠(IoU=0)
529        let a = Arc::new(MockEngine::from_findings(vec![Finding::model(
530            "person",
531            (0, 5),
532            0.9,
533            5,
534        )]));
535        let b = Arc::new(MockEngine::from_findings(vec![Finding::model(
536            "person",
537            (20, 30),
538            0.9,
539            5,
540        )]));
541        let ens = EnsembleEngine::new(vec![a, b]);
542        let f = ens.infer("any").unwrap();
543        assert_eq!(f.len(), 2, "同 kind 不重叠应都保留");
544    }
545
546    #[test]
547    fn ensemble_different_kind_overlapping_both_kept() {
548        // 同 span (0, 10),但 kind 不同 — 都保留(由 caller 审计层决策)
549        let a = Arc::new(MockEngine::from_findings(vec![Finding::model(
550            "person",
551            (0, 10),
552            0.9,
553            5,
554        )]));
555        let b = Arc::new(MockEngine::from_findings(vec![Finding::model(
556            "email",
557            (0, 10),
558            0.9,
559            10,
560        )]));
561        let ens = EnsembleEngine::new(vec![a, b]);
562        let f = ens.infer("any").unwrap();
563        assert_eq!(f.len(), 2, "不同 kind 重叠不去重");
564    }
565
566    #[test]
567    fn ensemble_propagates_engine_error() {
568        struct FailingEngine;
569        impl RedactionEngine for FailingEngine {
570            fn infer(&self, _: &str) -> Result<Vec<Finding>, EngineError> {
571                Err(EngineError::InferRun("mock-failure".to_string()))
572            }
573        }
574        let a = Arc::new(MockEngine::from_findings(vec![Finding::model(
575            "person",
576            (0, 5),
577            0.9,
578            5,
579        )]));
580        let b = Arc::new(FailingEngine);
581        let ens = EnsembleEngine::new(vec![a, b]);
582        let r = ens.infer("any");
583        assert!(
584            matches!(r, Err(EngineError::InferRun(_))),
585            "任一引擎失败应 propagate(fail-closed)"
586        );
587    }
588
589    #[test]
590    fn ensemble_three_engines_iou_above_threshold_merges() {
591        // 三引擎场景:
592        // - "xlmr" 给 person (0, 6) — IoU(0,6)(0,10) = 6/10 = 0.6 ≥ 0.5
593        // - "yonigo" 没出 person(MockEngine 空)
594        // - 第三 mock(模拟"OpenAI")给 person (0, 10) 完整 span
595        // 期望:IoU 0.6 触发合并,取 longer span (0, 10)
596        let xlmr = Arc::new(MockEngine::from_findings(vec![Finding::model(
597            "person",
598            (0, 6),
599            0.85,
600            5,
601        )]));
602        let yonigo = Arc::new(MockEngine::from_findings(vec![]));
603        let openai = Arc::new(MockEngine::from_findings(vec![Finding::model(
604            "person",
605            (0, 10),
606            0.95,
607            5,
608        )]));
609        let ens = EnsembleEngine::new(vec![xlmr, yonigo, openai]);
610        let f = ens.infer("John Smith works here.").unwrap();
611        assert_eq!(f.len(), 1, "三 engine 同 kind IoU 0.6 应合 1");
612        assert_eq!(f[0].span, (0, 10), "应取 longer span (10 > 6)");
613    }
614
615    #[test]
616    fn ensemble_spike3_realistic_iou_below_threshold_keeps_both() {
617        // 这是 spike-3 实测真实场景:xlmr 给 (0, 4) "John",truth (0, 10);
618        // IoU = 4/10 = 0.4 < 0.5 阈值。openai 假设给 (0, 10);两 spans 都保留
619        // (体现 spike-3 person 0.67 recall 的真实算法行为 — IoU 不达,合并失败)
620        let xlmr = Arc::new(MockEngine::from_findings(vec![Finding::model(
621            "person",
622            (0, 4),
623            0.85,
624            5,
625        )]));
626        let openai = Arc::new(MockEngine::from_findings(vec![Finding::model(
627            "person",
628            (0, 10),
629            0.95,
630            5,
631        )]));
632        let ens = EnsembleEngine::new(vec![xlmr, openai]);
633        let f = ens.infer("John Smith.").unwrap();
634        assert_eq!(f.len(), 2, "IoU 0.4 < 0.5 不合并(spike-3 实测真实行为)");
635    }
636
637    #[test]
638    fn ensemble_iou_threshold_boundary_just_below_05_keeps_both() {
639        // (0, 4) ∩ (3, 10) = (3, 4) → 长度 1; ∪ = (0, 10) → 长度 10;IoU = 0.1
640        // < 0.5 阈值 → 都保留
641        let a = Arc::new(MockEngine::from_findings(vec![Finding::model(
642            "person",
643            (0, 4),
644            0.9,
645            5,
646        )]));
647        let b = Arc::new(MockEngine::from_findings(vec![Finding::model(
648            "person",
649            (3, 10),
650            0.9,
651            5,
652        )]));
653        let ens = EnsembleEngine::new(vec![a, b]);
654        let f = ens.infer("any").unwrap();
655        assert_eq!(f.len(), 2, "IoU < 0.5 不合并");
656    }
657
658    // ─── v0.7-α5 A step:cross-engine 双确认守门 ───
659    use crate::label::PrivacyLabel;
660
661    /// dual_confirm 关闭(默认)→ 与 R1h 行为一致(单 engine 报也保留)
662    #[test]
663    fn dual_confirm_default_off_keeps_single_engine_finding() {
664        let a = Arc::new(MockEngine::from_findings(vec![Finding::model(
665            "private_address",
666            (0, 10),
667            0.9,
668            5,
669        )]));
670        let b = Arc::new(MockEngine::from_findings(vec![]));
671        let ens = EnsembleEngine::new(vec![a, b]); // dual_confirm 默认空
672        let f = ens.infer("any").unwrap();
673        assert_eq!(f.len(), 1, "默认无 dual_confirm,单 engine 报应保留");
674    }
675
676    /// dual_confirm Address 启用 → 单 engine address 丢弃
677    #[test]
678    fn dual_confirm_address_drops_single_engine_finding() {
679        let a = Arc::new(MockEngine::from_findings(vec![Finding::model(
680            "private_address",
681            (0, 10),
682            0.9,
683            5,
684        )]));
685        let b = Arc::new(MockEngine::from_findings(vec![]));
686        let ens = EnsembleEngine::new(vec![a, b]).with_dual_confirm([PrivacyLabel::Address]);
687        let f = ens.infer("any").unwrap();
688        assert!(
689            f.is_empty(),
690            "dual_confirm Address 启用 + 仅 engine_a 报 → 丢弃,实际: {:?}",
691            f
692        );
693    }
694
695    /// dual_confirm Address 启用 + 双 engine 共识 → 保留 longest
696    #[test]
697    fn dual_confirm_address_keeps_dual_engine_consensus() {
698        // engine_a: address (0, 6) 短;engine_b: address (0, 10) 长
699        // IoU = 6/10 = 0.6 ≥ 0.5 → cluster;2 distinct engine → 保留 longest (0, 10)
700        let a = Arc::new(MockEngine::from_findings(vec![Finding::model(
701            "private_address",
702            (0, 6),
703            0.9,
704            5,
705        )]));
706        let b = Arc::new(MockEngine::from_findings(vec![Finding::model(
707            "private_address",
708            (0, 10),
709            0.9,
710            5,
711        )]));
712        let ens = EnsembleEngine::new(vec![a, b]).with_dual_confirm([PrivacyLabel::Address]);
713        let f = ens.infer("any").unwrap();
714        assert_eq!(f.len(), 1, "双 engine 共识应保留 1");
715        assert_eq!(f[0].span, (0, 10), "应取 longest span");
716    }
717
718    /// dual_confirm Address 启用,但 Person 不在集合 → Person 单 engine 仍保留
719    #[test]
720    fn dual_confirm_selective_keeps_other_labels() {
721        let a = Arc::new(MockEngine::from_findings(vec![
722            Finding::model("private_person", (0, 10), 0.9, 5),
723            Finding::model("private_address", (20, 30), 0.9, 5),
724        ]));
725        let b = Arc::new(MockEngine::from_findings(vec![])); // 空
726        let ens = EnsembleEngine::new(vec![a, b]).with_dual_confirm([PrivacyLabel::Address]);
727        let f = ens.infer("any").unwrap();
728        // Person 不在 dual_confirm:保留;Address 在 dual_confirm + 单 engine:丢
729        assert_eq!(f.len(), 1);
730        assert_eq!(f[0].kind, "private_person");
731    }
732
733    /// dual_confirm 多 label 集合
734    #[test]
735    fn dual_confirm_multi_labels() {
736        let a = Arc::new(MockEngine::from_findings(vec![
737            Finding::model("private_address", (0, 10), 0.9, 5),
738            Finding::model("private_date", (20, 30), 0.9, 5),
739            Finding::model("private_email", (40, 50), 0.9, 5),
740        ]));
741        let b = Arc::new(MockEngine::from_findings(vec![])); // 全单 engine
742        let ens = EnsembleEngine::new(vec![a, b])
743            .with_dual_confirm([PrivacyLabel::Address, PrivacyLabel::Date]);
744        let f = ens.infer("any").unwrap();
745        // Address + Date 单 engine 丢;Email 不在集合保留
746        assert_eq!(f.len(), 1);
747        assert_eq!(f[0].kind, "private_email");
748    }
749
750    /// 不同 engine 不同 span(IoU < 0.5)→ 分独立 cluster,各自检查 dual_confirm
751    #[test]
752    fn dual_confirm_separate_clusters_each_checked() {
753        // address (0, 5) by engine_a;address (20, 30) by engine_b — 不重叠
754        // 两个独立 cluster,各 1 distinct engine → dual_confirm Address 各自丢
755        let a = Arc::new(MockEngine::from_findings(vec![Finding::model(
756            "private_address",
757            (0, 5),
758            0.9,
759            5,
760        )]));
761        let b = Arc::new(MockEngine::from_findings(vec![Finding::model(
762            "private_address",
763            (20, 30),
764            0.9,
765            5,
766        )]));
767        let ens = EnsembleEngine::new(vec![a, b]).with_dual_confirm([PrivacyLabel::Address]);
768        let f = ens.infer("any").unwrap();
769        assert!(
770            f.is_empty(),
771            "两个独立 cluster 各 1 engine → 都丢(dual_confirm 不跨 cluster 共识)"
772        );
773    }
774
775    // ─── v0.8 Sprint 3 P2.0:per-engine attribution 守门 ───
776
777    #[test]
778    fn attribution_default_uses_unknown_idx() {
779        // 不调 with_model_ids → contributing_engines 应为 ["unknown-0"](单 engine cluster)
780        let a = Arc::new(MockEngine::from_findings(vec![Finding::model(
781            "person",
782            (0, 10),
783            0.9,
784            5,
785        )]));
786        let ens = EnsembleEngine::new(vec![a]);
787        let (findings, attrs) = ens.infer_with_attribution("any").unwrap();
788        assert_eq!(findings.len(), 1);
789        assert_eq!(attrs.len(), 1);
790        assert_eq!(attrs[0].finding_index, 0);
791        assert_eq!(attrs[0].contributing_engines, vec!["unknown-0".to_string()]);
792    }
793
794    #[test]
795    fn attribution_with_model_ids_returns_real_names() {
796        let a = Arc::new(MockEngine::from_findings(vec![Finding::model(
797            "person",
798            (0, 10),
799            0.9,
800            5,
801        )]));
802        let b = Arc::new(MockEngine::from_findings(vec![Finding::model(
803            "email",
804            (20, 30),
805            0.9,
806            5,
807        )]));
808        let ens = EnsembleEngine::new(vec![a, b])
809            .with_model_ids(["openai-privacy-filter-v1", "xlmr-pii-v1"]);
810        let (findings, attrs) = ens.infer_with_attribution("any").unwrap();
811        assert_eq!(findings.len(), 2);
812        assert_eq!(attrs.len(), 2);
813        // 按 span.start 升序;person (0,10) 来自 engine 0,email (20,30) 来自 engine 1
814        assert_eq!(
815            attrs[0].contributing_engines,
816            vec!["openai-privacy-filter-v1".to_string()]
817        );
818        assert_eq!(
819            attrs[1].contributing_engines,
820            vec!["xlmr-pii-v1".to_string()]
821        );
822    }
823
824    #[test]
825    fn attribution_consensus_lists_all_contributing_engines() {
826        // 两 engine 同 IoU cluster(person (0, 6) + person (0, 10),IoU=0.6)
827        // → 1 finding(longest)+ contributing_engines 含两个 model_id
828        let a = Arc::new(MockEngine::from_findings(vec![Finding::model(
829            "person",
830            (0, 6),
831            0.85,
832            5,
833        )]));
834        let b = Arc::new(MockEngine::from_findings(vec![Finding::model(
835            "person",
836            (0, 10),
837            0.95,
838            5,
839        )]));
840        let ens = EnsembleEngine::new(vec![a, b])
841            .with_model_ids(["xlmr-pii-v1", "openai-privacy-filter-v1"]);
842        let (findings, attrs) = ens.infer_with_attribution("any").unwrap();
843        assert_eq!(findings.len(), 1, "IoU 0.6 应合 1");
844        assert_eq!(findings[0].span, (0, 10));
845        assert_eq!(attrs.len(), 1);
846        // distinct + 字典序排序:openai-... 在 xlmr-... 前
847        assert_eq!(
848            attrs[0].contributing_engines,
849            vec![
850                "openai-privacy-filter-v1".to_string(),
851                "xlmr-pii-v1".to_string()
852            ]
853        );
854    }
855
856    #[test]
857    #[should_panic(expected = "with_model_ids")]
858    fn attribution_with_mismatched_ids_count_panics() {
859        let a = Arc::new(MockEngine::from_findings(vec![]));
860        let b = Arc::new(MockEngine::from_findings(vec![]));
861        // 2 engines + 1 id → fail-fast panic
862        let _ens = EnsembleEngine::new(vec![a, b]).with_model_ids(["only-one"]);
863    }
864
865    #[test]
866    fn attribution_dual_confirm_drops_single_engine_consistent() {
867        // dual_confirm Address 启用 + 单 engine 报 → finding 丢 + attribution 也不应出现
868        let a = Arc::new(MockEngine::from_findings(vec![Finding::model(
869            "private_address",
870            (0, 10),
871            0.9,
872            5,
873        )]));
874        let b = Arc::new(MockEngine::from_findings(vec![]));
875        let ens = EnsembleEngine::new(vec![a, b])
876            .with_model_ids(["openai", "xlmr"])
877            .with_dual_confirm([PrivacyLabel::Address]);
878        let (findings, attrs) = ens.infer_with_attribution("any").unwrap();
879        assert!(findings.is_empty());
880        assert!(
881            attrs.is_empty(),
882            "dual_confirm 丢 finding 时 attribution 也应丢"
883        );
884    }
885
886    #[test]
887    fn attribution_finding_index_aligns_with_findings_array() {
888        // 多 finding 场景验 finding_index 对应 findings 数组下标
889        let a = Arc::new(MockEngine::from_findings(vec![
890            Finding::model("address", (50, 100), 0.9, 5),
891            Finding::model("person", (0, 10), 0.9, 5),
892        ]));
893        let b = Arc::new(MockEngine::from_findings(vec![Finding::model(
894            "email",
895            (20, 40),
896            0.9,
897            10,
898        )]));
899        let ens = EnsembleEngine::new(vec![a, b]).with_model_ids(["e0", "e1"]);
900        let (findings, attrs) = ens.infer_with_attribution("any").unwrap();
901        assert_eq!(findings.len(), 3);
902        assert_eq!(attrs.len(), 3);
903        for (i, a) in attrs.iter().enumerate() {
904            assert_eq!(
905                a.finding_index, i,
906                "finding_index 必须与 findings 数组下标对齐"
907            );
908        }
909    }
910
911    // ─── v0.9 Sprint 1 P1.3 fix — EnsembleEngine.infer_with_lang 透传 lang 守门 ───
912
913    /// 测试 mock:override `infer_with_lang` 捕获 lang;验证 ensemble 真透传到
914    /// 每个子 engine,而非走 trait default 委托 self.infer(那是导致 v0.9 P1.3
915    /// 远程实测 lang_aware 数据 == baseline 的关键 bug 根因)。
916    struct LangCapturingTestEngine {
917        captured: std::sync::Mutex<Vec<Option<String>>>,
918    }
919
920    impl LangCapturingTestEngine {
921        fn new() -> Self {
922            Self {
923                captured: std::sync::Mutex::new(Vec::new()),
924            }
925        }
926
927        fn captured(&self) -> Vec<Option<String>> {
928            self.captured.lock().unwrap().clone()
929        }
930    }
931
932    impl RedactionEngine for LangCapturingTestEngine {
933        fn infer(&self, _text: &str) -> Result<Vec<Finding>, EngineError> {
934            // legacy 路径不该被 ensemble.infer_with_lang(text, Some(_)) 触发
935            // (因为 ensemble 应 override 透传 lang_with_lang 而非走 default)
936            self.captured.lock().unwrap().push(None);
937            Ok(Vec::new())
938        }
939
940        fn infer_with_lang(
941            &self,
942            text: &str,
943            lang: Option<&str>,
944        ) -> Result<Vec<Finding>, EngineError> {
945            if lang.is_none() {
946                return self.infer(text);
947            }
948            self.captured.lock().unwrap().push(lang.map(String::from));
949            Ok(Vec::new())
950        }
951    }
952
953    /// **关键回归守门**:ensemble.infer_with_lang(text, Some("de")) 必须把
954    /// "de" 透传到每个子 engine 的 infer_with_lang(),而不是走 trait default
955    /// 委托 self.infer(那是导致 P1.3 实测无效的 bug)。
956    #[test]
957    fn ensemble_infer_with_lang_propagates_lang_to_all_sub_engines() {
958        let a = Arc::new(LangCapturingTestEngine::new());
959        let b = Arc::new(LangCapturingTestEngine::new());
960        let ens = EnsembleEngine::new(vec![a.clone(), b.clone()]);
961
962        let _ = ens.infer_with_lang("any text", Some("de")).unwrap();
963
964        // 两个子 engine 都应捕获 Some("de"),而非 None(走 default 委托 infer)
965        assert_eq!(
966            a.captured(),
967            vec![Some("de".to_string())],
968            "engine a 应收到透传的 lang Some(\"de\");若是 None 表明 ensemble 走 default 委托 infer (bug 根因)"
969        );
970        assert_eq!(
971            b.captured(),
972            vec![Some("de".to_string())],
973            "engine b 应收到透传的 lang Some(\"de\")"
974        );
975    }
976
977    /// ensemble.infer(text) legacy 路径委托 infer_with_lang(text, None) — 子 engine
978    /// 接到 None 走 default 兼容;v0.8 行为不变。
979    #[test]
980    fn ensemble_infer_legacy_passes_none_lang() {
981        let a = Arc::new(LangCapturingTestEngine::new());
982        let ens = EnsembleEngine::new(vec![a.clone()]);
983        let _ = ens.infer("any").unwrap();
984        assert_eq!(
985            a.captured(),
986            vec![None],
987            "legacy ensemble.infer 应让子 engine 走 lang None(等价 v0.8)"
988        );
989    }
990
991    // ─── v0.10 Sprint 3 — infer_with_attribution_with_lang 守门(P1.3 R1 NICE 兑付)───
992
993    /// 关键回归守门:`infer_with_attribution_with_lang(text, Some("de"))` 必须
994    /// 把 "de" 透传到每个子 engine 的 `infer_with_lang()`(P1.3 R1 NICE 修复 —
995    /// 不能 silent fallback baseline `engine.infer(text)`)。
996    #[test]
997    fn ensemble_infer_with_attribution_with_lang_propagates_lang() {
998        let a = Arc::new(LangCapturingTestEngine::new());
999        let b = Arc::new(LangCapturingTestEngine::new());
1000        let ens = EnsembleEngine::new(vec![a.clone(), b.clone()]).with_model_ids(["e0", "e1"]);
1001
1002        let _ = ens
1003            .infer_with_attribution_with_lang("any", Some("de"))
1004            .unwrap();
1005        assert_eq!(
1006            a.captured(),
1007            vec![Some("de".to_string())],
1008            "engine a 必须收到 lang Some(\"de\")(P1.3 R1 NICE attribution lang 透传)"
1009        );
1010        assert_eq!(b.captured(), vec![Some("de".to_string())]);
1011    }
1012
1013    /// `infer_with_attribution(text)` legacy 路径委托
1014    /// `infer_with_attribution_with_lang(text, None)` — v0.9 行为不变。
1015    #[test]
1016    fn ensemble_infer_with_attribution_legacy_passes_none_lang() {
1017        let a = Arc::new(LangCapturingTestEngine::new());
1018        let ens = EnsembleEngine::new(vec![a.clone()]).with_model_ids(["e0"]);
1019        let _ = ens.infer_with_attribution("any").unwrap();
1020        assert_eq!(
1021            a.captured(),
1022            vec![None],
1023            "legacy infer_with_attribution 应走 lang None(等价 v0.9 baseline)"
1024        );
1025    }
1026
1027    #[test]
1028    fn ensemble_output_sorted_by_span_start() {
1029        // 故意倒序加 findings,验证输出按 span.start 升序
1030        let a = Arc::new(MockEngine::from_findings(vec![
1031            Finding::model("address", (50, 100), 0.9, 5),
1032            Finding::model("person", (0, 10), 0.9, 5),
1033        ]));
1034        let b = Arc::new(MockEngine::from_findings(vec![Finding::model(
1035            "email",
1036            (20, 40),
1037            0.9,
1038            10,
1039        )]));
1040        let ens = EnsembleEngine::new(vec![a, b]);
1041        let f = ens.infer("any").unwrap();
1042        assert_eq!(f.len(), 3);
1043        assert_eq!(f[0].span, (0, 10));
1044        assert_eq!(f[1].span, (20, 40));
1045        assert_eq!(f[2].span, (50, 100));
1046    }
1047}
vigil_redaction/ensemble.rs

vigil_redaction/
ensemble.rs