1#![allow(missing_docs)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum FindingSource {
23 Hard,
25 Model,
27}
28
29#[derive(Debug, Clone, PartialEq)]
32pub struct Finding {
33 pub kind: &'static str,
35 pub source: FindingSource,
37 pub span: (usize, usize),
39 pub confidence: f32,
41 pub risk_delta: u32,
43}
44
45impl Finding {
46 pub fn hard(kind: &'static str, span: (usize, usize), risk_delta: u32) -> Self {
48 Self {
49 kind,
50 source: FindingSource::Hard,
51 span,
52 confidence: 1.0,
53 risk_delta,
54 }
55 }
56
57 pub fn model(
59 kind: &'static str,
60 span: (usize, usize),
61 confidence: f32,
62 risk_delta: u32,
63 ) -> Self {
64 Self {
65 kind,
66 source: FindingSource::Model,
67 span,
68 confidence,
69 risk_delta,
70 }
71 }
72}
73
74#[inline]
77fn spans_overlap(a: (usize, usize), b: (usize, usize)) -> bool {
78 a.0 < b.1 && b.0 < a.1
79}
80
81pub fn merge_findings(hard: &[Finding], model: &[Finding]) -> Vec<Finding> {
93 let mut out: Vec<Finding> = Vec::with_capacity(hard.len() + model.len());
94 out.extend(hard.iter().cloned());
96 for m in model {
98 let overlapped = hard.iter().any(|h| spans_overlap(h.span, m.span));
99 if !overlapped {
100 out.push(m.clone());
101 }
102 }
103 out.sort_by_key(|f| f.span.0);
105 out
106}
107
108#[cfg(test)]
109mod tests {
110 use super::*;
111
112 fn h(kind: &'static str, start: usize, end: usize, risk: u32) -> Finding {
114 Finding::hard(kind, (start, end), risk)
115 }
116 fn m(kind: &'static str, start: usize, end: usize, conf: f32, risk: u32) -> Finding {
117 Finding::model(kind, (start, end), conf, risk)
118 }
119
120 #[test]
122 fn merge_empty_both() {
123 assert_eq!(merge_findings(&[], &[]), vec![]);
124 }
125
126 #[test]
128 fn merge_hard_only() {
129 let hard = vec![h("email", 10, 30, 10), h("aws_access_key_id", 50, 70, 25)];
130 let merged = merge_findings(&hard, &[]);
131 assert_eq!(merged, hard, "Hard findings 应按 span.start 升序保留");
132 }
133
134 #[test]
136 fn merge_model_only() {
137 let model = vec![
138 m("private_person", 0, 13, 0.99, 5),
139 m("private_date", 20, 30, 0.98, 5),
140 ];
141 let merged = merge_findings(&[], &model);
142 assert_eq!(merged, model);
143 }
144
145 #[test]
147 fn merge_non_overlapping_both_kept() {
148 let hard = vec![h("email", 73, 109, 10)];
150 let model = vec![
151 m("private_person", 0, 13, 0.99, 5),
152 m("private_date", 26, 36, 0.98, 5),
153 ];
154 let merged = merge_findings(&hard, &model);
155 assert_eq!(merged.len(), 3, "3 条不重叠 finding 应全保留");
156 assert_eq!(merged[0].kind, "private_person");
158 assert_eq!(merged[1].kind, "private_date");
159 assert_eq!(merged[2].kind, "email");
160 }
161
162 #[test]
164 fn merge_fully_overlapping_hard_wins() {
165 let hard = vec![h("email", 73, 109, 10)];
167 let model = vec![m("private_email", 73, 109, 1.0, 10)];
168 let merged = merge_findings(&hard, &model);
169 assert_eq!(merged.len(), 1, "重叠应只留 Hard");
170 assert_eq!(merged[0].kind, "email");
171 assert_eq!(merged[0].source, FindingSource::Hard);
172 }
173
174 #[test]
176 fn merge_partially_overlapping_hard_wins() {
177 let hard = vec![h("email", 73, 109, 10)];
179 let model = vec![m("private_email", 70, 85, 0.9, 10)];
180 let merged = merge_findings(&hard, &model);
181 assert_eq!(merged.len(), 1);
182 assert_eq!(merged[0].source, FindingSource::Hard);
183
184 let model2 = vec![m("private_email", 100, 120, 0.9, 10)];
186 let merged2 = merge_findings(&hard, &model2);
187 assert_eq!(merged2.len(), 1);
188 assert_eq!(merged2[0].source, FindingSource::Hard);
189
190 let model3 = vec![m("private_email", 70, 120, 0.9, 10)];
192 let merged3 = merge_findings(&hard, &model3);
193 assert_eq!(merged3.len(), 1);
194 assert_eq!(merged3[0].source, FindingSource::Hard);
195 }
196
197 #[test]
199 fn merge_adjacent_not_overlap() {
200 let hard = vec![h("email", 10, 20, 10)];
202 let model = vec![m("private_person", 20, 30, 0.9, 5)];
203 let merged = merge_findings(&hard, &model);
204 assert_eq!(
205 merged.len(),
206 2,
207 "相邻 span 两者都保留(spans_overlap 严格 strict-less)"
208 );
209 assert_eq!(merged[0].kind, "email");
210 assert_eq!(merged[1].kind, "private_person");
211 }
212
213 #[test]
215 fn merge_no_double_weighting_on_overlap() {
216 let hard = vec![h("email", 73, 109, 10)];
218 let model = vec![m("private_email", 73, 109, 1.0, 10)];
219 let merged = merge_findings(&hard, &model);
220 let total: u32 = merged.iter().map(|f| f.risk_delta).sum();
221 assert_eq!(
222 total, 10,
223 "重叠时 risk 只计 Hard 一次,不应 Hard+Model 双加为 20"
224 );
225
226 let model2 = vec![m("private_email", 200, 220, 1.0, 10)];
228 let merged2 = merge_findings(&hard, &model2);
229 let total2: u32 = merged2.iter().map(|f| f.risk_delta).sum();
230 assert_eq!(total2, 20, "非重叠时 Hard + Model 正常累加");
231 }
232
233 #[test]
235 fn merge_iss_022_medium_sample_scenario() {
236 let hard = vec![h("email", 73, 109, 10)];
244 let model = vec![
245 m("private_person", 0, 13, 0.99, 5),
246 m("private_date", 26, 36, 0.98, 5),
247 m("private_person", 45, 70, 0.97, 5),
248 m("private_email", 73, 109, 1.0, 10),
249 m("private_phone", 117, 135, 1.0, 10),
250 m("private_address", 157, 201, 0.99, 5),
251 ];
252 let merged = merge_findings(&hard, &model);
253 assert_eq!(
254 merged.len(),
255 6,
256 "合并后 6 条(Hard 1 + Model 5,private_email drop)"
257 );
258 assert!(!merged.iter().any(|f| f.kind == "private_email"));
260 assert!(merged
262 .iter()
263 .any(|f| f.kind == "email" && f.source == FindingSource::Hard));
264 let starts: Vec<usize> = merged.iter().map(|f| f.span.0).collect();
266 assert_eq!(starts, vec![0, 26, 45, 73, 117, 157]);
267
268 let total: u32 = merged.iter().map(|f| f.risk_delta).sum();
270 assert_eq!(total, 40);
272 }
273
274 #[test]
276 fn merge_does_not_mutate_inputs() {
277 let hard = vec![h("email", 10, 20, 10)];
278 let model = vec![m("private_email", 10, 20, 1.0, 10)];
279 let hard_before = hard.clone();
280 let model_before = model.clone();
281 let _ = merge_findings(&hard, &model);
282 assert_eq!(hard, hard_before);
283 assert_eq!(model, model_before);
284 }
285
286 const HARD_KIND_TO_LABEL: &[(&str, crate::PrivacyLabel)] = &[
294 ("aws_access_key_id", crate::PrivacyLabel::Secret),
295 ("github_token", crate::PrivacyLabel::Secret),
296 ("anthropic_api_key", crate::PrivacyLabel::Secret),
297 ("openai_api_key", crate::PrivacyLabel::Secret),
298 ("jwt", crate::PrivacyLabel::Secret),
299 ("pem_private_key", crate::PrivacyLabel::Secret),
300 ("env_assignment", crate::PrivacyLabel::Secret),
301 ("slack_webhook", crate::PrivacyLabel::Secret),
302 ("stripe_secret_key", crate::PrivacyLabel::Secret),
303 ("google_api_key", crate::PrivacyLabel::Secret),
304 ("gitlab_pat", crate::PrivacyLabel::Secret),
305 ("database_url", crate::PrivacyLabel::Secret),
306 ("email", crate::PrivacyLabel::Email),
307 ("internal_ipv4", crate::PrivacyLabel::Url),
308 ];
309
310 fn paired_model_kind(hard_kind: &str) -> &'static str {
318 match hard_kind {
319 "email" => "private_email",
320 "internal_ipv4" => "private_url",
321 _ => "secret",
323 }
324 }
325
326 #[test]
329 fn iss_021_hard_kind_to_privacy_label_golden() {
330 use crate::PrivacyLabel;
331 for (kind, expected) in HARD_KIND_TO_LABEL {
332 assert_eq!(
333 PrivacyLabel::from_kind(kind),
334 Some(*expected),
335 "Hard kind {kind:?} 应映射到 {expected:?}\
336 (ADR 0013 Revised D-final-2 封闭映射;改字面量需同步 \
337 vigil-redaction::label.rs::from_kind + 本 golden 表)"
338 );
339 }
340 }
341
342 #[test]
344 fn iss_021_merge_overlap_hard_wins_for_each_kind() {
345 for (kind, _) in HARD_KIND_TO_LABEL {
346 let hard = vec![Finding::hard(kind, (10, 30), 25)];
347 let model = vec![Finding::model(paired_model_kind(kind), (10, 30), 1.0, 25)];
348 let merged = merge_findings(&hard, &model);
349 assert_eq!(
350 merged.len(),
351 1,
352 "Hard kind {kind:?} 同 span 重叠 merge 必去重为 1 条"
353 );
354 assert_eq!(
355 merged[0].source,
356 FindingSource::Hard,
357 "Hard kind {kind:?} 同 span 重叠应 Hard 赢(ADR 0013 D-final-1)"
358 );
359 assert_eq!(merged[0].kind, *kind);
360 assert_eq!(
362 merged[0].risk_delta, 25,
363 "Hard kind {kind:?} 重叠时 risk 只计 Hard 一次,不应 Hard+Model 双加"
364 );
365 }
366 }
367
368 #[test]
370 fn iss_021_merge_no_overlap_both_kept_for_each_kind() {
371 for (kind, _) in HARD_KIND_TO_LABEL {
372 let hard = vec![Finding::hard(kind, (10, 30), 25)];
373 let model = vec![Finding::model(paired_model_kind(kind), (50, 70), 1.0, 25)];
374 let merged = merge_findings(&hard, &model);
375 assert_eq!(
376 merged.len(),
377 2,
378 "Hard kind {kind:?} 非重叠 merge 两者都保留(ADR 0013 D5)"
379 );
380 assert_eq!(merged[0].source, FindingSource::Hard);
382 assert_eq!(merged[1].source, FindingSource::Model);
383 }
384 }
385
386 #[test]
402 fn iss_021_hard_kind_set_size_matches_redaction_rules() {
403 use std::collections::BTreeSet;
404
405 let golden_kinds: BTreeSet<&str> = HARD_KIND_TO_LABEL.iter().map(|(k, _)| *k).collect();
407
408 let mut expected_kinds: BTreeSet<&'static str> =
410 crate::HARD_RULES.iter().map(|r| r.name).collect();
411 expected_kinds.insert("email");
412 expected_kinds.insert("internal_ipv4");
413
414 assert_eq!(
416 golden_kinds, expected_kinds,
417 "HARD_KIND_TO_LABEL 与 (HARD_RULES + email/internal_ipv4) 集合漂移;\
418 检查 vigil-redaction lib.rs HARD_RULES 是否新增 / 删除了 hard rule,\
419 以及 ALL_RULES 是否还独有 email/internal_ipv4(若改动需同步本表 + \
420 ADR 0013 Revised 版本史)"
421 );
422
423 assert_eq!(golden_kinds.len(), 14);
425 }
426}