#![allow(missing_docs)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FindingSource {
Hard,
Model,
}
#[derive(Debug, Clone, PartialEq)]
pub struct Finding {
pub kind: &'static str,
pub source: FindingSource,
pub span: (usize, usize),
pub confidence: f32,
pub risk_delta: u32,
}
impl Finding {
pub fn hard(kind: &'static str, span: (usize, usize), risk_delta: u32) -> Self {
Self {
kind,
source: FindingSource::Hard,
span,
confidence: 1.0,
risk_delta,
}
}
pub fn model(
kind: &'static str,
span: (usize, usize),
confidence: f32,
risk_delta: u32,
) -> Self {
Self {
kind,
source: FindingSource::Model,
span,
confidence,
risk_delta,
}
}
}
#[inline]
fn spans_overlap(a: (usize, usize), b: (usize, usize)) -> bool {
a.0 < b.1 && b.0 < a.1
}
pub fn merge_findings(hard: &[Finding], model: &[Finding]) -> Vec<Finding> {
let mut out: Vec<Finding> = Vec::with_capacity(hard.len() + model.len());
out.extend(hard.iter().cloned());
for m in model {
let overlapped = hard.iter().any(|h| spans_overlap(h.span, m.span));
if !overlapped {
out.push(m.clone());
}
}
out.sort_by_key(|f| f.span.0);
out
}
#[cfg(test)]
mod tests {
use super::*;
fn h(kind: &'static str, start: usize, end: usize, risk: u32) -> Finding {
Finding::hard(kind, (start, end), risk)
}
fn m(kind: &'static str, start: usize, end: usize, conf: f32, risk: u32) -> Finding {
Finding::model(kind, (start, end), conf, risk)
}
#[test]
fn merge_empty_both() {
assert_eq!(merge_findings(&[], &[]), vec![]);
}
#[test]
fn merge_hard_only() {
let hard = vec![h("email", 10, 30, 10), h("aws_access_key_id", 50, 70, 25)];
let merged = merge_findings(&hard, &[]);
assert_eq!(merged, hard, "Hard findings 应按 span.start 升序保留");
}
#[test]
fn merge_model_only() {
let model = vec![
m("private_person", 0, 13, 0.99, 5),
m("private_date", 20, 30, 0.98, 5),
];
let merged = merge_findings(&[], &model);
assert_eq!(merged, model);
}
#[test]
fn merge_non_overlapping_both_kept() {
let hard = vec![h("email", 73, 109, 10)];
let model = vec![
m("private_person", 0, 13, 0.99, 5),
m("private_date", 26, 36, 0.98, 5),
];
let merged = merge_findings(&hard, &model);
assert_eq!(merged.len(), 3, "3 条不重叠 finding 应全保留");
assert_eq!(merged[0].kind, "private_person");
assert_eq!(merged[1].kind, "private_date");
assert_eq!(merged[2].kind, "email");
}
#[test]
fn merge_fully_overlapping_hard_wins() {
let hard = vec![h("email", 73, 109, 10)];
let model = vec![m("private_email", 73, 109, 1.0, 10)];
let merged = merge_findings(&hard, &model);
assert_eq!(merged.len(), 1, "重叠应只留 Hard");
assert_eq!(merged[0].kind, "email");
assert_eq!(merged[0].source, FindingSource::Hard);
}
#[test]
fn merge_partially_overlapping_hard_wins() {
let hard = vec![h("email", 73, 109, 10)];
let model = vec![m("private_email", 70, 85, 0.9, 10)];
let merged = merge_findings(&hard, &model);
assert_eq!(merged.len(), 1);
assert_eq!(merged[0].source, FindingSource::Hard);
let model2 = vec![m("private_email", 100, 120, 0.9, 10)];
let merged2 = merge_findings(&hard, &model2);
assert_eq!(merged2.len(), 1);
assert_eq!(merged2[0].source, FindingSource::Hard);
let model3 = vec![m("private_email", 70, 120, 0.9, 10)];
let merged3 = merge_findings(&hard, &model3);
assert_eq!(merged3.len(), 1);
assert_eq!(merged3[0].source, FindingSource::Hard);
}
#[test]
fn merge_adjacent_not_overlap() {
let hard = vec![h("email", 10, 20, 10)];
let model = vec![m("private_person", 20, 30, 0.9, 5)];
let merged = merge_findings(&hard, &model);
assert_eq!(
merged.len(),
2,
"相邻 span 两者都保留(spans_overlap 严格 strict-less)"
);
assert_eq!(merged[0].kind, "email");
assert_eq!(merged[1].kind, "private_person");
}
#[test]
fn merge_no_double_weighting_on_overlap() {
let hard = vec![h("email", 73, 109, 10)];
let model = vec![m("private_email", 73, 109, 1.0, 10)];
let merged = merge_findings(&hard, &model);
let total: u32 = merged.iter().map(|f| f.risk_delta).sum();
assert_eq!(
total, 10,
"重叠时 risk 只计 Hard 一次,不应 Hard+Model 双加为 20"
);
let model2 = vec![m("private_email", 200, 220, 1.0, 10)];
let merged2 = merge_findings(&hard, &model2);
let total2: u32 = merged2.iter().map(|f| f.risk_delta).sum();
assert_eq!(total2, 20, "非重叠时 Hard + Model 正常累加");
}
#[test]
fn merge_iss_022_medium_sample_scenario() {
let hard = vec![h("email", 73, 109, 10)];
let model = vec![
m("private_person", 0, 13, 0.99, 5),
m("private_date", 26, 36, 0.98, 5),
m("private_person", 45, 70, 0.97, 5),
m("private_email", 73, 109, 1.0, 10),
m("private_phone", 117, 135, 1.0, 10),
m("private_address", 157, 201, 0.99, 5),
];
let merged = merge_findings(&hard, &model);
assert_eq!(
merged.len(),
6,
"合并后 6 条(Hard 1 + Model 5,private_email drop)"
);
assert!(!merged.iter().any(|f| f.kind == "private_email"));
assert!(merged
.iter()
.any(|f| f.kind == "email" && f.source == FindingSource::Hard));
let starts: Vec<usize> = merged.iter().map(|f| f.span.0).collect();
assert_eq!(starts, vec![0, 26, 45, 73, 117, 157]);
let total: u32 = merged.iter().map(|f| f.risk_delta).sum();
assert_eq!(total, 40);
}
#[test]
fn merge_does_not_mutate_inputs() {
let hard = vec![h("email", 10, 20, 10)];
let model = vec![m("private_email", 10, 20, 1.0, 10)];
let hard_before = hard.clone();
let model_before = model.clone();
let _ = merge_findings(&hard, &model);
assert_eq!(hard, hard_before);
assert_eq!(model, model_before);
}
const HARD_KIND_TO_LABEL: &[(&str, crate::PrivacyLabel)] = &[
("aws_access_key_id", crate::PrivacyLabel::Secret),
("github_token", crate::PrivacyLabel::Secret),
("anthropic_api_key", crate::PrivacyLabel::Secret),
("openai_api_key", crate::PrivacyLabel::Secret),
("jwt", crate::PrivacyLabel::Secret),
("pem_private_key", crate::PrivacyLabel::Secret),
("env_assignment", crate::PrivacyLabel::Secret),
("slack_webhook", crate::PrivacyLabel::Secret),
("stripe_secret_key", crate::PrivacyLabel::Secret),
("google_api_key", crate::PrivacyLabel::Secret),
("gitlab_pat", crate::PrivacyLabel::Secret),
("database_url", crate::PrivacyLabel::Secret),
("email", crate::PrivacyLabel::Email),
("internal_ipv4", crate::PrivacyLabel::Url),
];
fn paired_model_kind(hard_kind: &str) -> &'static str {
match hard_kind {
"email" => "private_email",
"internal_ipv4" => "private_url",
_ => "secret",
}
}
#[test]
fn iss_021_hard_kind_to_privacy_label_golden() {
use crate::PrivacyLabel;
for (kind, expected) in HARD_KIND_TO_LABEL {
assert_eq!(
PrivacyLabel::from_kind(kind),
Some(*expected),
"Hard kind {kind:?} 应映射到 {expected:?}\
(ADR 0013 Revised D-final-2 封闭映射;改字面量需同步 \
vigil-redaction::label.rs::from_kind + 本 golden 表)"
);
}
}
#[test]
fn iss_021_merge_overlap_hard_wins_for_each_kind() {
for (kind, _) in HARD_KIND_TO_LABEL {
let hard = vec![Finding::hard(kind, (10, 30), 25)];
let model = vec![Finding::model(paired_model_kind(kind), (10, 30), 1.0, 25)];
let merged = merge_findings(&hard, &model);
assert_eq!(
merged.len(),
1,
"Hard kind {kind:?} 同 span 重叠 merge 必去重为 1 条"
);
assert_eq!(
merged[0].source,
FindingSource::Hard,
"Hard kind {kind:?} 同 span 重叠应 Hard 赢(ADR 0013 D-final-1)"
);
assert_eq!(merged[0].kind, *kind);
assert_eq!(
merged[0].risk_delta, 25,
"Hard kind {kind:?} 重叠时 risk 只计 Hard 一次,不应 Hard+Model 双加"
);
}
}
#[test]
fn iss_021_merge_no_overlap_both_kept_for_each_kind() {
for (kind, _) in HARD_KIND_TO_LABEL {
let hard = vec![Finding::hard(kind, (10, 30), 25)];
let model = vec![Finding::model(paired_model_kind(kind), (50, 70), 1.0, 25)];
let merged = merge_findings(&hard, &model);
assert_eq!(
merged.len(),
2,
"Hard kind {kind:?} 非重叠 merge 两者都保留(ADR 0013 D5)"
);
assert_eq!(merged[0].source, FindingSource::Hard);
assert_eq!(merged[1].source, FindingSource::Model);
}
}
#[test]
fn iss_021_hard_kind_set_size_matches_redaction_rules() {
use std::collections::BTreeSet;
let golden_kinds: BTreeSet<&str> = HARD_KIND_TO_LABEL.iter().map(|(k, _)| *k).collect();
let mut expected_kinds: BTreeSet<&'static str> =
crate::HARD_RULES.iter().map(|r| r.name).collect();
expected_kinds.insert("email");
expected_kinds.insert("internal_ipv4");
assert_eq!(
golden_kinds, expected_kinds,
"HARD_KIND_TO_LABEL 与 (HARD_RULES + email/internal_ipv4) 集合漂移;\
检查 vigil-redaction lib.rs HARD_RULES 是否新增 / 删除了 hard rule,\
以及 ALL_RULES 是否还独有 email/internal_ipv4(若改动需同步本表 + \
ADR 0013 Revised 版本史)"
);
assert_eq!(golden_kinds.len(), 14);
}
}