1use crate::baselines::BaselineSet;
4use crate::error::Result;
5use crate::heuristics::{
6 FeaturePolicyOverride, HeuristicAlertClass, PERSISTENT_INSTABILITY_CLUSTER,
7 PRE_FAILURE_SLOW_DRIFT, RECURRENT_BOUNDARY_APPROACH, TRANSITION_EXCURSION,
8};
9use crate::metrics::BenchmarkMetrics;
10use crate::nominal::NominalModel;
11use crate::precursor::{
12 evaluate_dsa, evaluate_dsa_with_policy, project_dsa_to_cohort, DsaConfig, DsaEvaluation,
13 DsaPolicyRuntime, RecallRescueConfig,
14};
15use crate::preprocessing::PreparedDataset;
16use crate::residual::ResidualSet;
17use crate::semiotics::{ScaffoldSemioticsArtifacts, SemanticLayer};
18use crate::signs::SignSet;
19use crate::{error::DsfbSemiconductorError, grammar::GrammarSet};
20use csv::Writer;
21use serde::Serialize;
22use std::cmp::Ordering;
23use std::collections::{BTreeMap, BTreeSet};
24use std::path::Path;
25
26const RANKING_FORMULA: &str =
27 "candidate_score = z(dsfb_raw_boundary_points) - z(dsfb_raw_violation_points) + z(ewma_alarm_points) - I(missing_fraction > 0.50) * 2.0";
28const RECALL_AWARE_RANKING_FORMULA: &str =
29 "candidate_score_recall = z(pre_failure_run_hits) + z(motif_precision_proxy) + z(ewma_alarm_points) + 0.5 * z(dsfb_raw_boundary_points) + 0.5 * z(recall_rescue_contribution) - 0.5 * z(dsfb_raw_violation_points) - I(missing_fraction > 0.50) * 2.0";
30const BURDEN_AWARE_RANKING_FORMULA: &str =
31 "candidate_score_burden = z(pre_failure_run_hits) + z(motif_precision_proxy) + 0.5 * z(dsfb_raw_boundary_points) + 0.5 * z(recall_rescue_contribution) - z(operator_burden_contribution) - 0.5 * z(dsfb_raw_violation_points) - I(missing_fraction > 0.50) * 2.0";
32const DSFB_AWARE_RANKING_FORMULA: &str =
33 "candidate_score_dsfb = z(pre_failure_run_hits) + z(motif_precision_proxy) + 0.5 * z(recall_rescue_contribution) + 0.5 * z(semantic_persistence_contribution) + 0.5 * z(grouped_semantic_support) + 0.25 * z(dsfb_raw_boundary_points) - z(operator_burden_contribution) - 0.5 * z(violation_overdominance_penalty) - I(missing_fraction > 0.50) * 2.0";
34const MISSINGNESS_PENALTY_THRESHOLD: f64 = 0.50;
35const MISSINGNESS_PENALTY_VALUE: f64 = 2.0;
36const RECALL_TOLERANCE: usize = 1;
37const PRIMARY_DELTA_TARGET: f64 = 0.40;
38const SECONDARY_DELTA_TARGET: f64 = 0.40;
39const CORROBORATION_SWEEP: &[usize] = &[1, 2, 3, 5];
40const DSA_WINDOW_SWEEP: &[usize] = &[5, 10, 15];
41const DSA_PERSISTENCE_SWEEP: &[usize] = &[2, 3, 4];
42const DSA_TAU_SWEEP: &[f64] = &[2.0, 2.5, 3.0];
43const CURRENT_BASELINE_SCORE: f64 = 8.1;
44const FORECAST_PRIMARY_ONLY: f64 = 8.8;
45const FORECAST_PRIMARY_PLUS_SECONDARY: f64 = 9.1;
46const FORECAST_RECALL_SHORTFALL_VALUE: f64 = 8.3;
47const SEED_FEATURES: &[&str] = &["S059", "S044", "S061", "S222", "S354", "S173"];
48const OPTIMIZATION_RESCUE_WINDOW: usize = 5;
49const OPTIMIZATION_RESCUE_MIN_HITS: usize = 4;
50const OPTIMIZATION_RESCUE_FRAGMENTATION: f64 = 0.5;
51const OPTIMIZATION_OVERRIDE_MAX_MISSINGNESS: f64 = 0.05;
52const OPERATOR_DELTA_THRESHOLD: f64 = 0.40;
53const MAX_FAILURE_DRIVEN_NUISANCE_OVERRIDES: usize = 5;
54const MAX_FAILURE_DRIVEN_ISOLATED_NUISANCE_OVERRIDES: usize = 3;
55
56#[derive(Debug, Clone, Serialize)]
57pub struct FeatureRankingRow {
58 pub ranking_strategy: String,
59 pub ranking_formula: String,
60 pub feature_index: usize,
61 pub feature_name: String,
62 pub dsfb_raw_boundary_points: usize,
63 pub dsfb_persistent_boundary_points: usize,
64 pub dsfb_raw_violation_points: usize,
65 pub dsfb_persistent_violation_points: usize,
66 pub ewma_alarm_points: usize,
67 pub threshold_alarm_points: usize,
68 pub pre_failure_run_hits: usize,
69 pub motif_precision_proxy: Option<f64>,
70 pub recall_rescue_contribution: Option<f64>,
71 pub operator_burden_contribution: Option<f64>,
72 pub semantic_persistence_contribution: Option<f64>,
73 pub grouped_semantic_support: Option<f64>,
74 pub violation_overdominance_penalty: Option<f64>,
75 pub missing_fraction: f64,
76 pub z_pre_failure_run_hits: Option<f64>,
77 pub z_motif_precision_proxy: Option<f64>,
78 pub z_recall_rescue_contribution: Option<f64>,
79 pub z_operator_burden_contribution: Option<f64>,
80 pub z_semantic_persistence_contribution: Option<f64>,
81 pub z_grouped_semantic_support: Option<f64>,
82 pub z_violation_overdominance_penalty: Option<f64>,
83 pub z_boundary: f64,
84 pub z_violation: f64,
85 pub z_ewma: f64,
86 pub missingness_penalty: f64,
87 pub candidate_score: f64,
88 pub score_breakdown: String,
89 pub rank: usize,
90}
91
92#[derive(Debug, Clone, Serialize)]
93pub struct FeatureRankingComparisonRow {
94 pub feature_index: usize,
95 pub feature_name: String,
96 pub compression_rank: Option<usize>,
97 pub recall_aware_rank: Option<usize>,
98 pub burden_aware_rank: Option<usize>,
99 pub dsfb_aware_rank: Option<usize>,
100 pub compression_score: Option<f64>,
101 pub recall_aware_score: Option<f64>,
102 pub burden_aware_score: Option<f64>,
103 pub dsfb_aware_score: Option<f64>,
104 pub rank_delta_recall_minus_compression: Option<i64>,
105 pub rank_delta_burden_minus_compression: Option<i64>,
106 pub rank_delta_dsfb_aware_minus_compression: Option<i64>,
107}
108
109#[derive(Debug, Clone, Serialize)]
110pub struct CohortMember {
111 pub feature_index: usize,
112 pub feature_name: String,
113 pub ranking_score: f64,
114 pub dsfb_boundary_points: usize,
115 pub dsfb_violation_points: usize,
116 pub ewma_alarm_points: usize,
117 pub threshold_alarm_points: usize,
118 pub missing_fraction: f64,
119 pub reason_for_inclusion: String,
120}
121
122#[derive(Debug, Clone, Serialize)]
123pub struct SeedFeatureReport {
124 pub feature_name: String,
125 pub found_in_ranking: bool,
126 pub rank: Option<usize>,
127 pub candidate_score: Option<f64>,
128 pub in_top_4: bool,
129 pub in_top_8: bool,
130 pub in_top_16: bool,
131 pub top_4_note: String,
132 pub top_8_note: String,
133 pub top_16_note: String,
134}
135
136#[derive(Debug, Clone, Serialize)]
137pub struct SeedFeatureCheckArtifact {
138 pub ranking_formula: String,
139 pub requested_seed_features: Vec<String>,
140 pub seed_feature_report: Vec<SeedFeatureReport>,
141}
142
143#[derive(Debug, Clone, Serialize)]
144pub struct FeatureCohorts {
145 pub ranking_formula: String,
146 pub missingness_penalty_threshold: f64,
147 pub missingness_penalty_value: f64,
148 pub top_4: Vec<CohortMember>,
149 pub top_8: Vec<CohortMember>,
150 pub top_16: Vec<CohortMember>,
151 pub all_features: Vec<CohortMember>,
152 pub seed_feature_report: Vec<SeedFeatureReport>,
153}
154
155#[derive(Debug, Clone, Serialize)]
156pub struct CohortGridResult {
157 pub ranking_strategy: String,
158 pub ranking_formula: String,
159 pub grid_row_id: usize,
160 pub feature_trace_config_id: usize,
161 pub cohort_name: String,
162 pub cohort_size: usize,
163 pub window: usize,
164 pub persistence_runs: usize,
165 pub alert_tau: f64,
166 pub corroborating_m: usize,
167 pub primary_run_signal: String,
168 pub failure_recall: usize,
169 pub failure_runs: usize,
170 pub failure_recall_rate: f64,
171 pub threshold_recall: usize,
172 pub ewma_recall: usize,
173 pub failure_recall_delta_vs_threshold: i64,
174 pub failure_recall_delta_vs_ewma: i64,
175 pub mean_lead_time_runs: Option<f64>,
176 pub median_lead_time_runs: Option<f64>,
177 pub threshold_mean_lead_time_runs: Option<f64>,
178 pub ewma_mean_lead_time_runs: Option<f64>,
179 pub mean_lead_delta_vs_threshold_runs: Option<f64>,
180 pub mean_lead_delta_vs_ewma_runs: Option<f64>,
181 pub pass_run_nuisance_proxy: f64,
182 pub numeric_pass_run_nuisance_proxy: f64,
183 pub ewma_nuisance: f64,
184 pub threshold_nuisance: f64,
185 pub pass_run_nuisance_delta_vs_ewma: f64,
186 pub pass_run_nuisance_delta_vs_threshold: f64,
187 pub pass_run_nuisance_delta_vs_numeric_dsa: f64,
188 pub raw_boundary_episode_count: usize,
189 pub dsa_episode_count: usize,
190 pub dsa_episodes_preceding_failure: usize,
191 pub mean_dsa_episode_length_runs: Option<f64>,
192 pub max_dsa_episode_length_runs: usize,
193 pub compression_ratio: Option<f64>,
194 pub precursor_quality: Option<f64>,
195 pub non_escalating_dsa_episode_fraction: Option<f64>,
196 pub feature_level_active_points: usize,
197 pub feature_level_alert_points: usize,
198 pub persistence_suppression_fraction: Option<f64>,
199 pub numeric_failure_recall: usize,
200 pub policy_vs_numeric_recall_delta: i64,
201 pub watch_point_count: usize,
202 pub review_point_count: usize,
203 pub escalate_point_count: usize,
204 pub investigation_point_count: usize,
205 pub numeric_investigation_point_count: usize,
206 pub silenced_point_count: usize,
207 pub rescued_point_count: usize,
208 pub rescued_watch_to_review_points: usize,
209 pub rescued_review_to_escalate_points: usize,
210 pub review_escalate_points_per_pass_run: f64,
211 pub numeric_alert_points_per_pass_run: f64,
212 pub review_escalate_episodes_per_pass_run: f64,
213 pub numeric_alert_episodes_per_pass_run: f64,
214 pub primary_success: bool,
215 pub primary_success_reason: String,
216}
217
218#[derive(Debug, Clone, Serialize)]
219pub struct FeaturePolicySummaryRow {
220 pub feature_index: usize,
221 pub feature_name: String,
222 pub compression_rank: Option<usize>,
223 pub recall_aware_rank: Option<usize>,
224 pub burden_aware_rank: Option<usize>,
225 pub dsfb_aware_rank: Option<usize>,
226 pub pre_failure_run_hits: usize,
227 pub motif_precision_proxy: Option<f64>,
228 pub missing_fraction: f64,
229 pub rescue_eligible: bool,
230 pub rescue_priority: usize,
231 pub alert_class_override: Option<HeuristicAlertClass>,
232 pub requires_persistence_override: Option<bool>,
233 pub requires_corroboration_override: Option<bool>,
234 pub minimum_window_override: Option<usize>,
235 pub minimum_hits_override: Option<usize>,
236 pub maximum_allowed_fragmentation_override: Option<f64>,
237 pub override_reason: String,
238 pub allow_watch_only: Option<bool>,
239 pub allow_review_without_escalate: Option<bool>,
240 pub suppress_if_isolated: Option<bool>,
241}
242
243#[derive(Debug, Clone, Serialize)]
244pub struct RecallRescueResultRow {
245 pub ranking_strategy: String,
246 pub cohort_name: String,
247 pub window: usize,
248 pub persistence_runs: usize,
249 pub alert_tau: f64,
250 pub corroborating_m: usize,
251 pub failure_recall: usize,
252 pub pass_run_nuisance_proxy: f64,
253 pub rescued_point_count: usize,
254 pub rescued_watch_to_review_points: usize,
255 pub rescued_review_to_escalate_points: usize,
256}
257
258#[derive(Debug, Clone, Serialize)]
259pub struct MissedFailureDiagnosticRow {
260 pub failure_run_index: usize,
261 pub nearest_feature_name: Option<String>,
262 pub nearest_feature_score: Option<f64>,
263 pub nearest_feature_policy_state: Option<String>,
264 pub nearest_feature_resolved_alert_class: Option<String>,
265 pub nearest_feature_boundary_density_w: Option<f64>,
266 pub nearest_feature_ewma_occupancy_w: Option<f64>,
267 pub nearest_feature_motif_recurrence_w: Option<f64>,
268 pub nearest_feature_fragmentation_proxy_w: Option<f64>,
269 pub nearest_feature_consistent: Option<bool>,
270 pub ranking_exclusion: bool,
271 pub cohort_selection: bool,
272 pub policy_suppression: bool,
273 pub fragmentation_ceiling: bool,
274 pub directional_consistency_gate: bool,
275 pub persistence_gate: bool,
276 pub corroboration_threshold: bool,
277 pub rescue_gate_not_activating: bool,
278 pub exact_miss_rule: String,
279 pub bounded_rescue_would_recover: bool,
280 pub recovered_after_optimization: bool,
281 pub optimized_feature_name: Option<String>,
282}
283
284#[derive(Debug, Clone, Serialize)]
285pub struct RecallCriticalFeatureRow {
286 pub failure_run_index: usize,
287 pub feature_index: Option<usize>,
288 pub feature_name: Option<String>,
289 pub compression_rank: Option<usize>,
290 pub recall_aware_rank: Option<usize>,
291 pub max_structural_score: Option<f64>,
292 pub resolved_alert_class: Option<String>,
293 pub policy_state: Option<String>,
294 pub boundary_density_w: Option<f64>,
295 pub ewma_occupancy_w: Option<f64>,
296 pub motif_recurrence_w: Option<f64>,
297 pub fragmentation_proxy_w: Option<f64>,
298 pub consistent: Option<bool>,
299 pub exact_miss_rule: String,
300 pub feature_override_exists: bool,
301 pub rescue_priority: Option<usize>,
302 pub allow_review_without_escalate: Option<bool>,
303 pub bounded_feature_override_would_recover: bool,
304 pub recovered_after_optimization: bool,
305 pub optimized_feature_name: Option<String>,
306 pub recall_rescue_contribution: f64,
307}
308
309#[derive(Debug, Clone, Serialize)]
310pub struct PolicyContributionAnalysisRow {
311 pub configuration_role: String,
312 pub contribution_type: String,
313 pub name: String,
314 pub value: f64,
315 pub note: String,
316}
317
318#[derive(Debug, Clone, Serialize)]
319pub struct CohortMotifPolicyContributionRow {
320 pub grid_row_id: usize,
321 pub cohort_name: String,
322 pub cohort_size: usize,
323 pub window: usize,
324 pub persistence_runs: usize,
325 pub alert_tau: f64,
326 pub corroborating_m: usize,
327 pub motif_name: String,
328 pub alert_class_default: HeuristicAlertClass,
329 pub watch_points: usize,
330 pub review_points: usize,
331 pub escalate_points: usize,
332 pub silent_suppression_points: usize,
333 pub pass_review_or_escalate_points: usize,
334 pub pre_failure_review_or_escalate_points: usize,
335}
336
337#[derive(Debug, Clone, Serialize)]
338pub struct CohortBestRow {
339 pub cohort_name: String,
340 pub best_row: CohortGridResult,
341}
342
343#[derive(Debug, Clone, Serialize)]
344pub struct CohortFailureAnalysis {
345 pub closest_cohort: String,
346 pub closest_grid_point: String,
347 pub closest_policy_setting: String,
348 pub closest_nuisance: f64,
349 pub closest_recall: usize,
350 pub ewma_nuisance: f64,
351 pub threshold_recall: usize,
352 pub limiting_factor: String,
353 pub corroboration_effect: String,
354 pub policy_vs_numeric_note: String,
355 pub ranking_quality_note: String,
356 pub all_feature_dsa_vs_cohort_note: String,
357 pub best_near_success_source: String,
358 pub nuisance_motif_classes: String,
359 pub useful_precursor_motif_classes: String,
360}
361
362#[derive(Debug, Clone, Serialize)]
363pub struct CohortGridSummary {
364 pub ranking_formula: String,
365 pub primary_success_condition_definition: String,
366 pub recall_tolerance_runs: usize,
367 pub grid_point_count: usize,
368 pub optimization_priority_order: Vec<String>,
369 pub success_row_count: usize,
370 pub any_success_row: bool,
371 pub closest_to_success: Option<CohortGridResult>,
372 pub best_success_row: Option<CohortGridResult>,
373 pub best_precursor_quality_row: Option<CohortGridResult>,
374 pub cross_feature_corroboration_effect: String,
375 pub limiting_factor: String,
376}
377
378#[derive(Debug, Clone, Serialize)]
379pub struct CohortDsaSummary {
380 pub ranking_formula: String,
381 pub primary_success_condition: String,
382 pub recall_tolerance_runs: usize,
383 pub cohort_results: Vec<CohortGridResult>,
384 pub best_by_cohort: Vec<CohortBestRow>,
385 pub closest_to_success: Option<CohortGridResult>,
386 pub best_primary_success: Option<CohortGridResult>,
387 pub best_precursor_quality_row: Option<CohortGridResult>,
388 pub selected_configuration: Option<CohortGridResult>,
389 pub best_cohort: Option<String>,
390 pub any_primary_success: bool,
391 pub failure_analysis: Option<CohortFailureAnalysis>,
392 pub grid_point_count: usize,
393 pub optimization_priority_order: Vec<String>,
394 pub cross_feature_corroboration_effect: String,
395 pub limiting_factor: String,
396}
397
398#[derive(Debug, Clone)]
399pub struct CohortExecution {
400 pub grid_summary: CohortGridSummary,
401 pub summary: CohortDsaSummary,
402 pub motif_policy_contributions: Vec<CohortMotifPolicyContributionRow>,
403 pub selected_evaluation: DsaEvaluation,
404}
405
406#[derive(Debug, Clone)]
407pub struct OptimizationExecution {
408 pub baseline_feature_ranking: Vec<FeatureRankingRow>,
409 pub baseline_feature_cohorts: FeatureCohorts,
410 pub baseline_execution: CohortExecution,
411 pub recall_aware_feature_ranking: Vec<FeatureRankingRow>,
412 pub burden_aware_feature_ranking: Vec<FeatureRankingRow>,
413 pub dsfb_aware_feature_ranking: Vec<FeatureRankingRow>,
414 pub ranking_comparison: Vec<FeatureRankingComparisonRow>,
415 pub recall_aware_feature_cohorts: FeatureCohorts,
416 pub burden_aware_feature_cohorts: FeatureCohorts,
417 pub dsfb_aware_feature_cohorts: FeatureCohorts,
418 pub feature_policy_overrides: Vec<FeaturePolicyOverride>,
419 pub feature_policy_summary: Vec<FeaturePolicySummaryRow>,
420 pub optimized_execution: CohortExecution,
421 pub recall_aware_execution: CohortExecution,
422 pub burden_aware_execution: CohortExecution,
423 pub dsfb_aware_execution: CohortExecution,
424 pub pareto_frontier: Vec<CohortGridResult>,
425 pub stage_a_candidates: Vec<CohortGridResult>,
426 pub stage_b_candidates: Vec<CohortGridResult>,
427 pub stage1_candidates: Vec<CohortGridResult>,
428 pub stage2_candidates: Vec<CohortGridResult>,
429 pub recall_rescue_results: Vec<RecallRescueResultRow>,
430 pub missed_failure_diagnostics: Vec<MissedFailureDiagnosticRow>,
431 pub recall_critical_features: Vec<RecallCriticalFeatureRow>,
432 pub policy_contribution_analysis: Vec<PolicyContributionAnalysisRow>,
433 pub operator_baselines: OperatorBaselines,
434 pub operator_delta_targets: OperatorDeltaTargets,
435 pub operator_delta_attainment_matrix: Vec<OperatorDeltaAttainmentRow>,
436 pub policy_operator_burden_contributions: Vec<OperatorBurdenContributionRow>,
437 pub recall_recovery_efficiency: Vec<RecallRecoveryEfficiencyRow>,
438 pub single_change_iteration_log: Vec<SingleChangeIterationRow>,
439 pub delta_target_assessment: DeltaTargetAssessment,
440}
441
442#[derive(Debug, Clone, Serialize)]
443pub struct DeltaCandidateSummary {
444 pub configuration: String,
445 pub ranking_strategy: String,
446 pub cohort_name: String,
447 pub window: usize,
448 pub persistence_runs: usize,
449 pub alert_tau: f64,
450 pub corroborating_m: usize,
451 pub failure_recall: usize,
452 pub failure_runs: usize,
453 pub pass_run_nuisance_proxy: f64,
454 pub delta_nuisance_vs_ewma: f64,
455 pub delta_nuisance_vs_current_dsa: f64,
456 pub mean_lead_time_runs: Option<f64>,
457 pub precursor_quality: Option<f64>,
458 pub compression_ratio: Option<f64>,
459}
460
461#[derive(Debug, Clone, Serialize)]
462pub struct DeltaTargetAssessment {
463 pub primary_target_definition: String,
464 pub secondary_target_definition: String,
465 pub ewma_nuisance_baseline: f64,
466 pub current_policy_dsa_nuisance_baseline: f64,
467 pub primary_delta_target: f64,
468 pub secondary_delta_target: f64,
469 pub primary_target_nuisance_ceiling: f64,
470 pub secondary_target_nuisance_ceiling: f64,
471 pub selected_configuration: DeltaCandidateSummary,
472 pub primary_target_met: bool,
473 pub ideal_target_met: bool,
474 pub secondary_target_met: bool,
475 pub mean_lead_time_ge_ewma: bool,
476 pub mean_lead_time_ge_threshold: bool,
477 pub best_recall_103_candidate: Option<DeltaCandidateSummary>,
478 pub best_recall_104_candidate: Option<DeltaCandidateSummary>,
479 pub best_secondary_target_candidate: Option<DeltaCandidateSummary>,
480 pub best_stage_a_delta_candidate: Option<DeltaCandidateSummary>,
481 pub best_reachable_pareto_point: DeltaCandidateSummary,
482 pub assessment_note: String,
483}
484
485#[derive(Debug, Clone, Serialize)]
486pub struct OperatorBaselineLayer {
487 pub name: String,
488 pub investigation_points: usize,
489 pub episode_count: usize,
490 pub review_escalate_points_per_pass_run: f64,
491 pub review_escalate_episodes_per_pass_run: f64,
492 pub precursor_quality: Option<f64>,
493 pub recall: usize,
494 pub pass_run_nuisance_proxy: f64,
495}
496
497#[derive(Debug, Clone, Serialize)]
498pub struct OperatorBaselines {
499 pub investigation_baseline_layer: String,
500 pub episode_baseline_layer: String,
501 pub review_burden_baseline_layer: String,
502 pub baseline_investigation_points: usize,
503 pub baseline_episode_count: usize,
504 pub baseline_review_escalate_points_per_pass_run: f64,
505 pub baseline_review_escalate_episodes_per_pass_run: f64,
506 pub baseline_precursor_quality: Option<f64>,
507 pub baseline_recall: usize,
508 pub numeric_only_dsa: OperatorBaselineLayer,
509 pub current_policy_dsa: OperatorBaselineLayer,
510 pub raw_boundary: OperatorBaselineLayer,
511}
512
513#[derive(Debug, Clone, Serialize)]
514pub struct OperatorDeltaTargets {
515 pub primary_success_definition: String,
516 pub recall_tolerance_runs: usize,
517 pub selected_configuration: DeltaCandidateSummary,
518 pub baseline_investigation_points: usize,
519 pub baseline_episode_count: usize,
520 pub baseline_review_points_per_pass_run: f64,
521 pub baseline_review_episodes_per_pass_run: f64,
522 pub optimized_review_escalate_points: usize,
523 pub optimized_episode_count: usize,
524 pub optimized_review_points_per_pass_run: f64,
525 pub optimized_review_episodes_per_pass_run: f64,
526 pub delta_investigation_load: f64,
527 pub delta_episode_count: f64,
528 pub delta_review_points_per_pass_run: f64,
529 pub delta_review_episodes_per_pass_run: f64,
530 pub precursor_quality_status: String,
531 pub recall_equals_threshold: bool,
532 pub recall_within_tolerance: bool,
533 pub recall_ge_103: bool,
534 pub recall_eq_104: bool,
535 pub delta_nuisance_vs_ewma: f64,
536 pub delta_nuisance_vs_threshold: f64,
537 pub mean_lead_delta_vs_ewma: Option<f64>,
538 pub mean_lead_delta_vs_threshold: Option<f64>,
539 pub median_lead_delta_vs_ewma: Option<f64>,
540 pub median_lead_delta_vs_threshold: Option<f64>,
541 pub stable_precursor_lead_time_delta: Option<f64>,
542}
543
544#[derive(Debug, Clone, Serialize)]
545pub struct OperatorDeltaAttainmentRow {
546 pub configuration_role: String,
547 pub configuration: String,
548 pub delta_investigation_load: f64,
549 pub delta_episode_count: f64,
550 pub delta_review_points_per_pass_run: f64,
551 pub delta_review_episodes_per_pass_run: f64,
552 pub precursor_quality_status: String,
553 pub recall: usize,
554 pub mean_lead_time_runs: Option<f64>,
555 pub delta_nuisance_vs_ewma: f64,
556 pub target_a_investigation_load_ge_040: bool,
557 pub target_b_episode_count_ge_040: bool,
558 pub target_c_review_points_per_pass_run_ge_040: bool,
559 pub target_d_review_episodes_per_pass_run_ge_040: bool,
560 pub target_e_precursor_quality_preserved_or_improved: bool,
561 pub target_f_recall_ge_103: bool,
562 pub target_g_recall_eq_104: bool,
563 pub target_h_nuisance_ge_015: bool,
564 pub target_h_nuisance_ge_025: bool,
565 pub target_h_nuisance_ge_040: bool,
566 pub target_i_stable_precursor_lead_improved: Option<bool>,
567}
568
569#[derive(Debug, Clone, Serialize)]
570pub struct OperatorBurdenContributionRow {
571 pub configuration_role: String,
572 pub contribution_scope: String,
573 pub name: String,
574 pub contribution_type: String,
575 pub value: f64,
576 pub note: String,
577}
578
579#[derive(Debug, Clone, Serialize)]
580pub struct RecallRecoveryEfficiencyRow {
581 pub failure_run_index: Option<usize>,
582 pub baseline_configuration: String,
583 pub optimized_configuration: String,
584 pub recovered_failures: i64,
585 pub added_review_escalate_points: i64,
586 pub added_episode_count: i64,
587 pub added_review_points_per_pass_run: f64,
588 pub added_review_episodes_per_pass_run: f64,
589 pub added_nuisance_runs: i64,
590 pub recovered_failures_per_added_review_escalate_point: Option<f64>,
591 pub recovered_failures_per_added_episode: Option<f64>,
592 pub recovered_failures_per_added_pass_run_burden: Option<f64>,
593 pub recovered_failures_per_added_nuisance_run: Option<f64>,
594}
595
596#[derive(Debug, Clone, Serialize)]
597pub struct SingleChangeIterationRow {
598 pub iteration: usize,
599 pub change_kind: String,
600 pub change_target: String,
601 pub reason: String,
602 pub derived_from_failures: String,
603 pub targets_nuisance_class: String,
604 pub affected_failures: String,
605 pub accepted: bool,
606 pub recall: usize,
607 pub investigation_points: usize,
608 pub episode_count: usize,
609 pub precursor_quality: Option<f64>,
610 pub pass_run_nuisance_proxy: f64,
611 pub delta_recall: i64,
612 pub delta_investigation_points: i64,
613 pub delta_episode_count: i64,
614 pub delta_precursor_quality: Option<f64>,
615 pub delta_pass_run_nuisance_proxy: f64,
616}
617
618#[derive(Debug, Clone, Serialize)]
619pub struct CategoryForecast {
620 pub category: String,
621 pub current: String,
622 pub forecast: String,
623 pub justification: String,
624}
625
626#[derive(Debug, Clone, Serialize)]
627pub struct ForecastSupportingMetrics {
628 pub chosen_configuration: String,
629 pub dsa_nuisance: f64,
630 pub ewma_nuisance: f64,
631 pub dsa_recall: usize,
632 pub threshold_recall: usize,
633 pub recall_tolerance_runs: usize,
634 pub dsa_mean_lead_time_runs: Option<f64>,
635 pub ewma_mean_lead_time_runs: Option<f64>,
636 pub threshold_mean_lead_time_runs: Option<f64>,
637 pub dsa_precursor_quality: Option<f64>,
638 pub all_feature_dsa_precursor_quality: Option<f64>,
639 pub dsa_compression_ratio: Option<f64>,
640 pub all_feature_dsa_compression_ratio: Option<f64>,
641}
642
643#[derive(Debug, Clone, Serialize)]
644pub struct RatingDeltaForecast {
645 pub current_baseline_score: f64,
646 pub primary_success_condition: String,
647 pub recall_tolerance_runs: usize,
648 pub chosen_configuration: String,
649 pub primary_success_met: bool,
650 pub secondary_targets_met: bool,
651 pub secondary_lead_time_vs_ewma: bool,
652 pub secondary_lead_time_vs_threshold: bool,
653 pub secondary_precursor_quality_vs_all_feature_dsa: Option<bool>,
654 pub secondary_compression_material: Option<bool>,
655 pub forecast_score_if_primary_success_only: f64,
656 pub forecast_score_if_primary_plus_secondary_success: f64,
657 pub achieved_forecast_score: f64,
658 pub forecast_justification: String,
659 pub category_forecasts: Vec<CategoryForecast>,
660 pub supporting_metrics: ForecastSupportingMetrics,
661}
662
663#[derive(Debug, Clone, Serialize)]
664pub struct RatingDeltaFailureAnalysis {
665 pub closest_configuration: String,
666 pub dsa_nuisance: f64,
667 pub ewma_nuisance: f64,
668 pub dsa_recall: usize,
669 pub threshold_recall: usize,
670 pub recall_tolerance_runs: usize,
671 pub nuisance_gap: f64,
672 pub recall_gap_runs: i64,
673 pub nuisance_missed_by: String,
674 pub recall_preserved: bool,
675 pub limiting_factor: String,
676}
677
678pub fn compute_feature_ranking(metrics: &BenchmarkMetrics) -> Vec<FeatureRankingRow> {
679 let analyzable = metrics
680 .feature_metrics
681 .iter()
682 .filter(|feature| feature.analyzable)
683 .collect::<Vec<_>>();
684 if analyzable.is_empty() {
685 return Vec::new();
686 }
687
688 let boundary_values = analyzable
689 .iter()
690 .map(|feature| feature.dsfb_raw_boundary_points as f64)
691 .collect::<Vec<_>>();
692 let violation_values = analyzable
693 .iter()
694 .map(|feature| feature.dsfb_raw_violation_points as f64)
695 .collect::<Vec<_>>();
696 let ewma_values = analyzable
697 .iter()
698 .map(|feature| feature.ewma_alarm_points as f64)
699 .collect::<Vec<_>>();
700
701 let (boundary_mean, boundary_std) = mean_std(&boundary_values);
702 let (violation_mean, violation_std) = mean_std(&violation_values);
703 let (ewma_mean, ewma_std) = mean_std(&ewma_values);
704
705 let mut ranking = analyzable
706 .iter()
707 .map(|feature| {
708 let z_boundary = z_score(
709 feature.dsfb_raw_boundary_points as f64,
710 boundary_mean,
711 boundary_std,
712 );
713 let z_violation = z_score(
714 feature.dsfb_raw_violation_points as f64,
715 violation_mean,
716 violation_std,
717 );
718 let z_ewma = z_score(feature.ewma_alarm_points as f64, ewma_mean, ewma_std);
719 let missingness_penalty = if feature.missing_fraction > MISSINGNESS_PENALTY_THRESHOLD {
720 MISSINGNESS_PENALTY_VALUE
721 } else {
722 0.0
723 };
724 let candidate_score = z_boundary - z_violation + z_ewma - missingness_penalty;
725
726 FeatureRankingRow {
727 ranking_strategy: "compression_biased".into(),
728 ranking_formula: RANKING_FORMULA.into(),
729 feature_index: feature.feature_index,
730 feature_name: feature.feature_name.clone(),
731 dsfb_raw_boundary_points: feature.dsfb_raw_boundary_points,
732 dsfb_persistent_boundary_points: feature.dsfb_persistent_boundary_points,
733 dsfb_raw_violation_points: feature.dsfb_raw_violation_points,
734 dsfb_persistent_violation_points: feature.dsfb_persistent_violation_points,
735 ewma_alarm_points: feature.ewma_alarm_points,
736 threshold_alarm_points: feature.threshold_alarm_points,
737 pre_failure_run_hits: feature.pre_failure_run_hits,
738 motif_precision_proxy: feature.motif_precision_proxy,
739 recall_rescue_contribution: None,
740 operator_burden_contribution: None,
741 semantic_persistence_contribution: None,
742 grouped_semantic_support: None,
743 violation_overdominance_penalty: None,
744 missing_fraction: feature.missing_fraction,
745 z_pre_failure_run_hits: None,
746 z_motif_precision_proxy: None,
747 z_recall_rescue_contribution: None,
748 z_operator_burden_contribution: None,
749 z_semantic_persistence_contribution: None,
750 z_grouped_semantic_support: None,
751 z_violation_overdominance_penalty: None,
752 z_boundary,
753 z_violation,
754 z_ewma,
755 missingness_penalty,
756 candidate_score,
757 score_breakdown: format!(
758 "{:+.4} boundary - {:+.4} violation + {:+.4} ewma - {:.1} missingness",
759 z_boundary, z_violation, z_ewma, missingness_penalty
760 ),
761 rank: 0,
762 }
763 })
764 .collect::<Vec<_>>();
765
766 ranking.sort_by(|left, right| {
767 right
768 .candidate_score
769 .partial_cmp(&left.candidate_score)
770 .unwrap_or(Ordering::Equal)
771 .then_with(|| left.feature_name.cmp(&right.feature_name))
772 });
773
774 for (index, row) in ranking.iter_mut().enumerate() {
775 row.rank = index + 1;
776 }
777
778 ranking
779}
780
781pub fn compute_feature_ranking_recall_aware(
782 metrics: &BenchmarkMetrics,
783 recall_rescue_contributions: &BTreeMap<usize, f64>,
784) -> Vec<FeatureRankingRow> {
785 let analyzable = metrics
786 .feature_metrics
787 .iter()
788 .filter(|feature| feature.analyzable)
789 .collect::<Vec<_>>();
790 if analyzable.is_empty() {
791 return Vec::new();
792 }
793
794 let pre_failure_values = analyzable
795 .iter()
796 .map(|feature| feature.pre_failure_run_hits as f64)
797 .collect::<Vec<_>>();
798 let motif_precision_values = analyzable
799 .iter()
800 .map(|feature| feature.motif_precision_proxy.unwrap_or(0.0))
801 .collect::<Vec<_>>();
802 let ewma_values = analyzable
803 .iter()
804 .map(|feature| feature.ewma_alarm_points as f64)
805 .collect::<Vec<_>>();
806 let boundary_values = analyzable
807 .iter()
808 .map(|feature| feature.dsfb_raw_boundary_points as f64)
809 .collect::<Vec<_>>();
810 let violation_values = analyzable
811 .iter()
812 .map(|feature| feature.dsfb_raw_violation_points as f64)
813 .collect::<Vec<_>>();
814 let recall_rescue_values = analyzable
815 .iter()
816 .map(|feature| {
817 recall_rescue_contributions
818 .get(&feature.feature_index)
819 .copied()
820 .unwrap_or(0.0)
821 })
822 .collect::<Vec<_>>();
823
824 let (pre_failure_mean, pre_failure_std) = mean_std(&pre_failure_values);
825 let (motif_precision_mean, motif_precision_std) = mean_std(&motif_precision_values);
826 let (ewma_mean, ewma_std) = mean_std(&ewma_values);
827 let (boundary_mean, boundary_std) = mean_std(&boundary_values);
828 let (violation_mean, violation_std) = mean_std(&violation_values);
829 let (recall_rescue_mean, recall_rescue_std) = mean_std(&recall_rescue_values);
830
831 let mut ranking = analyzable
832 .iter()
833 .map(|feature| {
834 let z_pre_failure_run_hits = z_score(
835 feature.pre_failure_run_hits as f64,
836 pre_failure_mean,
837 pre_failure_std,
838 );
839 let z_motif_precision_proxy = z_score(
840 feature.motif_precision_proxy.unwrap_or(0.0),
841 motif_precision_mean,
842 motif_precision_std,
843 );
844 let z_ewma = z_score(feature.ewma_alarm_points as f64, ewma_mean, ewma_std);
845 let z_boundary = z_score(
846 feature.dsfb_raw_boundary_points as f64,
847 boundary_mean,
848 boundary_std,
849 );
850 let z_violation = z_score(
851 feature.dsfb_raw_violation_points as f64,
852 violation_mean,
853 violation_std,
854 );
855 let recall_rescue_contribution = recall_rescue_contributions
856 .get(&feature.feature_index)
857 .copied()
858 .unwrap_or(0.0);
859 let z_recall_rescue_contribution = z_score(
860 recall_rescue_contribution,
861 recall_rescue_mean,
862 recall_rescue_std,
863 );
864 let missingness_penalty = if feature.missing_fraction > MISSINGNESS_PENALTY_THRESHOLD {
865 MISSINGNESS_PENALTY_VALUE
866 } else {
867 0.0
868 };
869 let candidate_score = z_pre_failure_run_hits
870 + z_motif_precision_proxy
871 + z_ewma
872 + 0.5 * z_boundary
873 + 0.5 * z_recall_rescue_contribution
874 - 0.5 * z_violation
875 - missingness_penalty;
876
877 FeatureRankingRow {
878 ranking_strategy: "recall_aware".into(),
879 ranking_formula: RECALL_AWARE_RANKING_FORMULA.into(),
880 feature_index: feature.feature_index,
881 feature_name: feature.feature_name.clone(),
882 dsfb_raw_boundary_points: feature.dsfb_raw_boundary_points,
883 dsfb_persistent_boundary_points: feature.dsfb_persistent_boundary_points,
884 dsfb_raw_violation_points: feature.dsfb_raw_violation_points,
885 dsfb_persistent_violation_points: feature.dsfb_persistent_violation_points,
886 ewma_alarm_points: feature.ewma_alarm_points,
887 threshold_alarm_points: feature.threshold_alarm_points,
888 pre_failure_run_hits: feature.pre_failure_run_hits,
889 motif_precision_proxy: feature.motif_precision_proxy,
890 recall_rescue_contribution: Some(recall_rescue_contribution),
891 operator_burden_contribution: None,
892 semantic_persistence_contribution: None,
893 grouped_semantic_support: None,
894 violation_overdominance_penalty: None,
895 missing_fraction: feature.missing_fraction,
896 z_pre_failure_run_hits: Some(z_pre_failure_run_hits),
897 z_motif_precision_proxy: Some(z_motif_precision_proxy),
898 z_recall_rescue_contribution: Some(z_recall_rescue_contribution),
899 z_operator_burden_contribution: None,
900 z_semantic_persistence_contribution: None,
901 z_grouped_semantic_support: None,
902 z_violation_overdominance_penalty: None,
903 z_boundary,
904 z_violation,
905 z_ewma,
906 missingness_penalty,
907 candidate_score,
908 score_breakdown: format!(
909 "{:+.4} pre_failure + {:+.4} motif_precision + {:+.4} ewma + 0.5*{:+.4} boundary + 0.5*{:+.4} recall_rescue - 0.5*{:+.4} violation - {:.1} missingness",
910 z_pre_failure_run_hits,
911 z_motif_precision_proxy,
912 z_ewma,
913 z_boundary,
914 z_recall_rescue_contribution,
915 z_violation,
916 missingness_penalty
917 ),
918 rank: 0,
919 }
920 })
921 .collect::<Vec<_>>();
922
923 ranking.sort_by(|left, right| {
924 right
925 .candidate_score
926 .partial_cmp(&left.candidate_score)
927 .unwrap_or(Ordering::Equal)
928 .then_with(|| left.feature_name.cmp(&right.feature_name))
929 });
930
931 for (index, row) in ranking.iter_mut().enumerate() {
932 row.rank = index + 1;
933 }
934
935 ranking
936}
937
938pub fn compute_feature_ranking_burden_aware(
939 metrics: &BenchmarkMetrics,
940 recall_rescue_contributions: &BTreeMap<usize, f64>,
941 operator_burden_contributions: &BTreeMap<usize, f64>,
942) -> Vec<FeatureRankingRow> {
943 let analyzable = metrics
944 .feature_metrics
945 .iter()
946 .filter(|feature| feature.analyzable)
947 .collect::<Vec<_>>();
948 if analyzable.is_empty() {
949 return Vec::new();
950 }
951
952 let pre_failure_values = analyzable
953 .iter()
954 .map(|feature| feature.pre_failure_run_hits as f64)
955 .collect::<Vec<_>>();
956 let motif_precision_values = analyzable
957 .iter()
958 .map(|feature| feature.motif_precision_proxy.unwrap_or(0.0))
959 .collect::<Vec<_>>();
960 let boundary_values = analyzable
961 .iter()
962 .map(|feature| feature.dsfb_raw_boundary_points as f64)
963 .collect::<Vec<_>>();
964 let violation_values = analyzable
965 .iter()
966 .map(|feature| feature.dsfb_raw_violation_points as f64)
967 .collect::<Vec<_>>();
968 let recall_rescue_values = analyzable
969 .iter()
970 .map(|feature| {
971 recall_rescue_contributions
972 .get(&feature.feature_index)
973 .copied()
974 .unwrap_or(0.0)
975 })
976 .collect::<Vec<_>>();
977 let operator_burden_values = analyzable
978 .iter()
979 .map(|feature| {
980 operator_burden_contributions
981 .get(&feature.feature_index)
982 .copied()
983 .unwrap_or(0.0)
984 })
985 .collect::<Vec<_>>();
986
987 let (pre_failure_mean, pre_failure_std) = mean_std(&pre_failure_values);
988 let (motif_precision_mean, motif_precision_std) = mean_std(&motif_precision_values);
989 let (boundary_mean, boundary_std) = mean_std(&boundary_values);
990 let (violation_mean, violation_std) = mean_std(&violation_values);
991 let (recall_rescue_mean, recall_rescue_std) = mean_std(&recall_rescue_values);
992 let (operator_burden_mean, operator_burden_std) = mean_std(&operator_burden_values);
993
994 let mut ranking = analyzable
995 .iter()
996 .map(|feature| {
997 let recall_rescue_contribution = recall_rescue_contributions
998 .get(&feature.feature_index)
999 .copied()
1000 .unwrap_or(0.0);
1001 let operator_burden_contribution = operator_burden_contributions
1002 .get(&feature.feature_index)
1003 .copied()
1004 .unwrap_or(0.0);
1005 let z_pre_failure_run_hits = z_score(
1006 feature.pre_failure_run_hits as f64,
1007 pre_failure_mean,
1008 pre_failure_std,
1009 );
1010 let z_motif_precision_proxy = z_score(
1011 feature.motif_precision_proxy.unwrap_or(0.0),
1012 motif_precision_mean,
1013 motif_precision_std,
1014 );
1015 let z_boundary = z_score(
1016 feature.dsfb_raw_boundary_points as f64,
1017 boundary_mean,
1018 boundary_std,
1019 );
1020 let z_violation = z_score(
1021 feature.dsfb_raw_violation_points as f64,
1022 violation_mean,
1023 violation_std,
1024 );
1025 let z_recall_rescue_contribution = z_score(
1026 recall_rescue_contribution,
1027 recall_rescue_mean,
1028 recall_rescue_std,
1029 );
1030 let z_operator_burden_contribution = z_score(
1031 operator_burden_contribution,
1032 operator_burden_mean,
1033 operator_burden_std,
1034 );
1035 let missingness_penalty = if feature.missing_fraction > MISSINGNESS_PENALTY_THRESHOLD {
1036 MISSINGNESS_PENALTY_VALUE
1037 } else {
1038 0.0
1039 };
1040 let candidate_score = z_pre_failure_run_hits
1041 + z_motif_precision_proxy
1042 + 0.5 * z_boundary
1043 + 0.5 * z_recall_rescue_contribution
1044 - z_operator_burden_contribution
1045 - 0.5 * z_violation
1046 - missingness_penalty;
1047
1048 FeatureRankingRow {
1049 ranking_strategy: "burden_aware".into(),
1050 ranking_formula: BURDEN_AWARE_RANKING_FORMULA.into(),
1051 feature_index: feature.feature_index,
1052 feature_name: feature.feature_name.clone(),
1053 dsfb_raw_boundary_points: feature.dsfb_raw_boundary_points,
1054 dsfb_persistent_boundary_points: feature.dsfb_persistent_boundary_points,
1055 dsfb_raw_violation_points: feature.dsfb_raw_violation_points,
1056 dsfb_persistent_violation_points: feature.dsfb_persistent_violation_points,
1057 ewma_alarm_points: feature.ewma_alarm_points,
1058 threshold_alarm_points: feature.threshold_alarm_points,
1059 pre_failure_run_hits: feature.pre_failure_run_hits,
1060 motif_precision_proxy: feature.motif_precision_proxy,
1061 recall_rescue_contribution: Some(recall_rescue_contribution),
1062 operator_burden_contribution: Some(operator_burden_contribution),
1063 semantic_persistence_contribution: None,
1064 grouped_semantic_support: None,
1065 violation_overdominance_penalty: None,
1066 missing_fraction: feature.missing_fraction,
1067 z_pre_failure_run_hits: Some(z_pre_failure_run_hits),
1068 z_motif_precision_proxy: Some(z_motif_precision_proxy),
1069 z_recall_rescue_contribution: Some(z_recall_rescue_contribution),
1070 z_operator_burden_contribution: Some(z_operator_burden_contribution),
1071 z_semantic_persistence_contribution: None,
1072 z_grouped_semantic_support: None,
1073 z_violation_overdominance_penalty: None,
1074 z_boundary,
1075 z_violation,
1076 z_ewma: 0.0,
1077 missingness_penalty,
1078 candidate_score,
1079 score_breakdown: format!(
1080 "{:+.4} pre_failure + {:+.4} motif_precision + 0.5*{:+.4} boundary + 0.5*{:+.4} recall_rescue - {:+.4} burden - 0.5*{:+.4} violation - {:.1} missingness",
1081 z_pre_failure_run_hits,
1082 z_motif_precision_proxy,
1083 z_boundary,
1084 z_recall_rescue_contribution,
1085 z_operator_burden_contribution,
1086 z_violation,
1087 missingness_penalty
1088 ),
1089 rank: 0,
1090 }
1091 })
1092 .collect::<Vec<_>>();
1093
1094 ranking.sort_by(|left, right| {
1095 right
1096 .candidate_score
1097 .partial_cmp(&left.candidate_score)
1098 .unwrap_or(Ordering::Equal)
1099 .then_with(|| left.feature_name.cmp(&right.feature_name))
1100 });
1101
1102 for (index, row) in ranking.iter_mut().enumerate() {
1103 row.rank = index + 1;
1104 }
1105
1106 ranking
1107}
1108
1109pub fn compute_feature_ranking_dsfb_aware(
1110 metrics: &BenchmarkMetrics,
1111 recall_rescue_contributions: &BTreeMap<usize, f64>,
1112 operator_burden_contributions: &BTreeMap<usize, f64>,
1113 semantic_layer: &SemanticLayer,
1114 scaffold_semiotics: &ScaffoldSemioticsArtifacts,
1115) -> Vec<FeatureRankingRow> {
1116 let analyzable = metrics
1117 .feature_metrics
1118 .iter()
1119 .filter(|feature| feature.analyzable)
1120 .collect::<Vec<_>>();
1121 if analyzable.is_empty() {
1122 return Vec::new();
1123 }
1124
1125 let semantic_persistence_contributions =
1126 semantic_persistence_contribution_by_feature(semantic_layer);
1127 let grouped_semantic_support = grouped_semantic_support_by_feature(scaffold_semiotics);
1128 let pre_failure_values = analyzable
1129 .iter()
1130 .map(|feature| feature.pre_failure_run_hits as f64)
1131 .collect::<Vec<_>>();
1132 let motif_precision_values = analyzable
1133 .iter()
1134 .map(|feature| feature.motif_precision_proxy.unwrap_or(0.0))
1135 .collect::<Vec<_>>();
1136 let boundary_values = analyzable
1137 .iter()
1138 .map(|feature| feature.dsfb_raw_boundary_points as f64)
1139 .collect::<Vec<_>>();
1140 let recall_rescue_values = analyzable
1141 .iter()
1142 .map(|feature| {
1143 recall_rescue_contributions
1144 .get(&feature.feature_index)
1145 .copied()
1146 .unwrap_or(0.0)
1147 })
1148 .collect::<Vec<_>>();
1149 let operator_burden_values = analyzable
1150 .iter()
1151 .map(|feature| {
1152 operator_burden_contributions
1153 .get(&feature.feature_index)
1154 .copied()
1155 .unwrap_or(0.0)
1156 })
1157 .collect::<Vec<_>>();
1158 let semantic_persistence_values = analyzable
1159 .iter()
1160 .map(|feature| {
1161 semantic_persistence_contributions
1162 .get(&feature.feature_index)
1163 .copied()
1164 .unwrap_or(0.0)
1165 })
1166 .collect::<Vec<_>>();
1167 let grouped_support_values = analyzable
1168 .iter()
1169 .map(|feature| {
1170 grouped_semantic_support
1171 .get(&feature.feature_index)
1172 .copied()
1173 .unwrap_or(0.0)
1174 })
1175 .collect::<Vec<_>>();
1176 let violation_overdominance_values = analyzable
1177 .iter()
1178 .map(|feature| {
1179 feature.dsfb_raw_violation_points as f64
1180 / feature.dsfb_raw_boundary_points.max(1) as f64
1181 })
1182 .collect::<Vec<_>>();
1183
1184 let (pre_failure_mean, pre_failure_std) = mean_std(&pre_failure_values);
1185 let (motif_precision_mean, motif_precision_std) = mean_std(&motif_precision_values);
1186 let (boundary_mean, boundary_std) = mean_std(&boundary_values);
1187 let (recall_rescue_mean, recall_rescue_std) = mean_std(&recall_rescue_values);
1188 let (operator_burden_mean, operator_burden_std) = mean_std(&operator_burden_values);
1189 let (semantic_persistence_mean, semantic_persistence_std) =
1190 mean_std(&semantic_persistence_values);
1191 let (grouped_support_mean, grouped_support_std) = mean_std(&grouped_support_values);
1192 let (violation_overdominance_mean, violation_overdominance_std) =
1193 mean_std(&violation_overdominance_values);
1194
1195 let mut ranking = analyzable
1196 .iter()
1197 .map(|feature| {
1198 let recall_rescue_contribution = recall_rescue_contributions
1199 .get(&feature.feature_index)
1200 .copied()
1201 .unwrap_or(0.0);
1202 let operator_burden_contribution = operator_burden_contributions
1203 .get(&feature.feature_index)
1204 .copied()
1205 .unwrap_or(0.0);
1206 let semantic_persistence_contribution = semantic_persistence_contributions
1207 .get(&feature.feature_index)
1208 .copied()
1209 .unwrap_or(0.0);
1210 let grouped_semantic_support = grouped_semantic_support
1211 .get(&feature.feature_index)
1212 .copied()
1213 .unwrap_or(0.0);
1214 let violation_overdominance_penalty = feature.dsfb_raw_violation_points as f64
1215 / feature.dsfb_raw_boundary_points.max(1) as f64;
1216
1217 let z_pre_failure_run_hits = z_score(
1218 feature.pre_failure_run_hits as f64,
1219 pre_failure_mean,
1220 pre_failure_std,
1221 );
1222 let z_motif_precision_proxy = z_score(
1223 feature.motif_precision_proxy.unwrap_or(0.0),
1224 motif_precision_mean,
1225 motif_precision_std,
1226 );
1227 let z_boundary = z_score(
1228 feature.dsfb_raw_boundary_points as f64,
1229 boundary_mean,
1230 boundary_std,
1231 );
1232 let z_recall_rescue_contribution = z_score(
1233 recall_rescue_contribution,
1234 recall_rescue_mean,
1235 recall_rescue_std,
1236 );
1237 let z_operator_burden_contribution = z_score(
1238 operator_burden_contribution,
1239 operator_burden_mean,
1240 operator_burden_std,
1241 );
1242 let z_semantic_persistence_contribution = z_score(
1243 semantic_persistence_contribution,
1244 semantic_persistence_mean,
1245 semantic_persistence_std,
1246 );
1247 let z_grouped_semantic_support = z_score(
1248 grouped_semantic_support,
1249 grouped_support_mean,
1250 grouped_support_std,
1251 );
1252 let z_violation_overdominance_penalty = z_score(
1253 violation_overdominance_penalty,
1254 violation_overdominance_mean,
1255 violation_overdominance_std,
1256 );
1257 let missingness_penalty = if feature.missing_fraction > MISSINGNESS_PENALTY_THRESHOLD {
1258 MISSINGNESS_PENALTY_VALUE
1259 } else {
1260 0.0
1261 };
1262 let candidate_score = z_pre_failure_run_hits
1263 + z_motif_precision_proxy
1264 + 0.5 * z_recall_rescue_contribution
1265 + 0.5 * z_semantic_persistence_contribution
1266 + 0.5 * z_grouped_semantic_support
1267 + 0.25 * z_boundary
1268 - z_operator_burden_contribution
1269 - 0.5 * z_violation_overdominance_penalty
1270 - missingness_penalty;
1271
1272 FeatureRankingRow {
1273 ranking_strategy: "dsfb_aware".into(),
1274 ranking_formula: DSFB_AWARE_RANKING_FORMULA.into(),
1275 feature_index: feature.feature_index,
1276 feature_name: feature.feature_name.clone(),
1277 dsfb_raw_boundary_points: feature.dsfb_raw_boundary_points,
1278 dsfb_persistent_boundary_points: feature.dsfb_persistent_boundary_points,
1279 dsfb_raw_violation_points: feature.dsfb_raw_violation_points,
1280 dsfb_persistent_violation_points: feature.dsfb_persistent_violation_points,
1281 ewma_alarm_points: feature.ewma_alarm_points,
1282 threshold_alarm_points: feature.threshold_alarm_points,
1283 pre_failure_run_hits: feature.pre_failure_run_hits,
1284 motif_precision_proxy: feature.motif_precision_proxy,
1285 recall_rescue_contribution: Some(recall_rescue_contribution),
1286 operator_burden_contribution: Some(operator_burden_contribution),
1287 semantic_persistence_contribution: Some(semantic_persistence_contribution),
1288 grouped_semantic_support: Some(grouped_semantic_support),
1289 violation_overdominance_penalty: Some(violation_overdominance_penalty),
1290 missing_fraction: feature.missing_fraction,
1291 z_pre_failure_run_hits: Some(z_pre_failure_run_hits),
1292 z_motif_precision_proxy: Some(z_motif_precision_proxy),
1293 z_recall_rescue_contribution: Some(z_recall_rescue_contribution),
1294 z_operator_burden_contribution: Some(z_operator_burden_contribution),
1295 z_semantic_persistence_contribution: Some(z_semantic_persistence_contribution),
1296 z_grouped_semantic_support: Some(z_grouped_semantic_support),
1297 z_violation_overdominance_penalty: Some(z_violation_overdominance_penalty),
1298 z_boundary,
1299 z_violation: 0.0,
1300 z_ewma: 0.0,
1301 missingness_penalty,
1302 candidate_score,
1303 score_breakdown: format!(
1304 "{:+.4} pre_failure + {:+.4} motif_precision + 0.5*{:+.4} recall_rescue + 0.5*{:+.4} semantic_persistence + 0.5*{:+.4} grouped_support + 0.25*{:+.4} boundary - {:+.4} burden - 0.5*{:+.4} violation_overdominance - {:.1} missingness",
1305 z_pre_failure_run_hits,
1306 z_motif_precision_proxy,
1307 z_recall_rescue_contribution,
1308 z_semantic_persistence_contribution,
1309 z_grouped_semantic_support,
1310 z_boundary,
1311 z_operator_burden_contribution,
1312 z_violation_overdominance_penalty,
1313 missingness_penalty
1314 ),
1315 rank: 0,
1316 }
1317 })
1318 .collect::<Vec<_>>();
1319
1320 ranking.sort_by(|left, right| {
1321 right
1322 .candidate_score
1323 .partial_cmp(&left.candidate_score)
1324 .unwrap_or(Ordering::Equal)
1325 .then_with(|| left.feature_name.cmp(&right.feature_name))
1326 });
1327
1328 for (index, row) in ranking.iter_mut().enumerate() {
1329 row.rank = index + 1;
1330 }
1331
1332 ranking
1333}
1334
1335pub fn write_feature_ranking_csv(path: &Path, ranking: &[FeatureRankingRow]) -> Result<()> {
1336 let mut writer = Writer::from_path(path)?;
1337 writer.write_record([
1338 "ranking_strategy",
1339 "rank",
1340 "feature_index",
1341 "feature_name",
1342 "ranking_formula",
1343 "dsfb_raw_boundary_points",
1344 "dsfb_persistent_boundary_points",
1345 "dsfb_raw_violation_points",
1346 "dsfb_persistent_violation_points",
1347 "ewma_alarm_points",
1348 "threshold_alarm_points",
1349 "pre_failure_run_hits",
1350 "motif_precision_proxy",
1351 "recall_rescue_contribution",
1352 "operator_burden_contribution",
1353 "semantic_persistence_contribution",
1354 "grouped_semantic_support",
1355 "violation_overdominance_penalty",
1356 "missing_fraction",
1357 "z_pre_failure_run_hits",
1358 "z_motif_precision_proxy",
1359 "z_recall_rescue_contribution",
1360 "z_operator_burden_contribution",
1361 "z_semantic_persistence_contribution",
1362 "z_grouped_semantic_support",
1363 "z_violation_overdominance_penalty",
1364 "z_boundary",
1365 "z_violation",
1366 "z_ewma",
1367 "missingness_penalty",
1368 "candidate_score",
1369 "score_breakdown",
1370 ])?;
1371 for row in ranking {
1372 writer.write_record([
1373 row.ranking_strategy.clone(),
1374 row.rank.to_string(),
1375 row.feature_index.to_string(),
1376 row.feature_name.clone(),
1377 row.ranking_formula.clone(),
1378 row.dsfb_raw_boundary_points.to_string(),
1379 row.dsfb_persistent_boundary_points.to_string(),
1380 row.dsfb_raw_violation_points.to_string(),
1381 row.dsfb_persistent_violation_points.to_string(),
1382 row.ewma_alarm_points.to_string(),
1383 row.threshold_alarm_points.to_string(),
1384 row.pre_failure_run_hits.to_string(),
1385 format_option_csv(row.motif_precision_proxy),
1386 format_option_csv(row.recall_rescue_contribution),
1387 format_option_csv(row.operator_burden_contribution),
1388 format_option_csv(row.semantic_persistence_contribution),
1389 format_option_csv(row.grouped_semantic_support),
1390 format_option_csv(row.violation_overdominance_penalty),
1391 format!("{:.6}", row.missing_fraction),
1392 format_option_csv(row.z_pre_failure_run_hits),
1393 format_option_csv(row.z_motif_precision_proxy),
1394 format_option_csv(row.z_recall_rescue_contribution),
1395 format_option_csv(row.z_operator_burden_contribution),
1396 format_option_csv(row.z_semantic_persistence_contribution),
1397 format_option_csv(row.z_grouped_semantic_support),
1398 format_option_csv(row.z_violation_overdominance_penalty),
1399 format!("{:.6}", row.z_boundary),
1400 format!("{:.6}", row.z_violation),
1401 format!("{:.6}", row.z_ewma),
1402 format!("{:.6}", row.missingness_penalty),
1403 format!("{:.6}", row.candidate_score),
1404 row.score_breakdown.clone(),
1405 ])?;
1406 }
1407 writer.flush()?;
1408 Ok(())
1409}
1410
1411pub fn compare_feature_rankings(
1412 compression_ranking: &[FeatureRankingRow],
1413 recall_aware_ranking: &[FeatureRankingRow],
1414 burden_aware_ranking: &[FeatureRankingRow],
1415 dsfb_aware_ranking: &[FeatureRankingRow],
1416) -> Vec<FeatureRankingComparisonRow> {
1417 let compression_by_feature = compression_ranking
1418 .iter()
1419 .map(|row| (&row.feature_name, row))
1420 .collect::<BTreeMap<_, _>>();
1421 let recall_by_feature = recall_aware_ranking
1422 .iter()
1423 .map(|row| (&row.feature_name, row))
1424 .collect::<BTreeMap<_, _>>();
1425 let burden_by_feature = burden_aware_ranking
1426 .iter()
1427 .map(|row| (&row.feature_name, row))
1428 .collect::<BTreeMap<_, _>>();
1429 let dsfb_by_feature = dsfb_aware_ranking
1430 .iter()
1431 .map(|row| (&row.feature_name, row))
1432 .collect::<BTreeMap<_, _>>();
1433
1434 let mut feature_names = compression_by_feature
1435 .keys()
1436 .copied()
1437 .chain(recall_by_feature.keys().copied())
1438 .chain(burden_by_feature.keys().copied())
1439 .chain(dsfb_by_feature.keys().copied())
1440 .collect::<Vec<_>>();
1441 feature_names.sort_unstable();
1442 feature_names.dedup();
1443
1444 feature_names
1445 .into_iter()
1446 .map(|feature_name| {
1447 let compression = compression_by_feature.get(feature_name).copied();
1448 let recall = recall_by_feature.get(feature_name).copied();
1449 let burden = burden_by_feature.get(feature_name).copied();
1450 let dsfb = dsfb_by_feature.get(feature_name).copied();
1451 FeatureRankingComparisonRow {
1452 feature_index: compression
1453 .or(recall)
1454 .or(burden)
1455 .or(dsfb)
1456 .map(|row| row.feature_index)
1457 .unwrap_or_default(),
1458 feature_name: feature_name.to_string(),
1459 compression_rank: compression.map(|row| row.rank),
1460 recall_aware_rank: recall.map(|row| row.rank),
1461 burden_aware_rank: burden.map(|row| row.rank),
1462 dsfb_aware_rank: dsfb.map(|row| row.rank),
1463 compression_score: compression.map(|row| row.candidate_score),
1464 recall_aware_score: recall.map(|row| row.candidate_score),
1465 burden_aware_score: burden.map(|row| row.candidate_score),
1466 dsfb_aware_score: dsfb.map(|row| row.candidate_score),
1467 rank_delta_recall_minus_compression: match (compression, recall) {
1468 (Some(compression), Some(recall)) => {
1469 Some(recall.rank as i64 - compression.rank as i64)
1470 }
1471 _ => None,
1472 },
1473 rank_delta_burden_minus_compression: match (compression, burden) {
1474 (Some(compression), Some(burden)) => {
1475 Some(burden.rank as i64 - compression.rank as i64)
1476 }
1477 _ => None,
1478 },
1479 rank_delta_dsfb_aware_minus_compression: match (compression, dsfb) {
1480 (Some(compression), Some(dsfb)) => {
1481 Some(dsfb.rank as i64 - compression.rank as i64)
1482 }
1483 _ => None,
1484 },
1485 }
1486 })
1487 .collect()
1488}
1489
1490fn semantic_persistence_contribution_by_feature(
1491 semantic_layer: &SemanticLayer,
1492) -> BTreeMap<usize, f64> {
1493 let top_candidate_by_feature_run = semantic_layer.ranked_candidates.iter().fold(
1494 BTreeMap::<(usize, usize), (String, f64, usize)>::new(),
1495 |mut acc, row| {
1496 acc.entry((row.feature_index, row.run_index))
1497 .and_modify(|existing| {
1498 if row.rank < existing.2
1499 || (row.rank == existing.2 && row.structural_score_proxy > existing.1)
1500 {
1501 *existing = (
1502 row.heuristic_name.clone(),
1503 row.structural_score_proxy,
1504 row.rank,
1505 );
1506 }
1507 })
1508 .or_insert((
1509 row.heuristic_name.clone(),
1510 row.structural_score_proxy,
1511 row.rank,
1512 ));
1513 acc
1514 },
1515 );
1516
1517 let by_feature = top_candidate_by_feature_run.into_iter().fold(
1518 BTreeMap::<usize, Vec<(usize, String, f64)>>::new(),
1519 |mut acc, ((feature_index, run_index), (heuristic_name, structural_score, _rank))| {
1520 acc.entry(feature_index).or_default().push((
1521 run_index,
1522 heuristic_name.to_string(),
1523 structural_score,
1524 ));
1525 acc
1526 },
1527 );
1528
1529 let mut contributions = BTreeMap::<usize, f64>::new();
1530 for (feature_index, mut rows) in by_feature {
1531 rows.sort_by_key(|(run_index, _, _)| *run_index);
1532 let mut contribution = 0.0;
1533 let mut streak_len = 0usize;
1534 let mut streak_score = 0.0;
1535 let mut previous_run = None::<usize>;
1536 let mut previous_heuristic = None::<String>;
1537 for (run_index, heuristic_name, structural_score) in rows {
1538 let continues = previous_run.is_some_and(|prev| prev + 1 == run_index)
1539 && previous_heuristic
1540 .as_deref()
1541 .is_some_and(|prev| prev == heuristic_name.as_str());
1542 if continues {
1543 streak_len += 1;
1544 streak_score += structural_score;
1545 } else {
1546 if streak_len >= 2 {
1547 contribution += streak_score;
1548 }
1549 streak_len = 1;
1550 streak_score = structural_score;
1551 }
1552 previous_run = Some(run_index);
1553 previous_heuristic = Some(heuristic_name);
1554 }
1555 if streak_len >= 2 {
1556 contribution += streak_score;
1557 }
1558 contributions.insert(feature_index, contribution);
1559 }
1560 contributions
1561}
1562
1563fn grouped_semantic_support_by_feature(
1564 scaffold_semiotics: &ScaffoldSemioticsArtifacts,
1565) -> BTreeMap<usize, f64> {
1566 let feature_index_by_name = scaffold_semiotics
1567 .feature_signs
1568 .iter()
1569 .map(|row| (row.feature_name.as_str(), row.feature_index))
1570 .collect::<BTreeMap<_, _>>();
1571 let mut contributions = BTreeMap::<usize, f64>::new();
1572 for row in &scaffold_semiotics.group_semantic_matches {
1573 let participating = row
1574 .participating_features
1575 .split(',')
1576 .filter(|feature_name| !feature_name.is_empty())
1577 .collect::<Vec<_>>();
1578 if participating.is_empty() {
1579 continue;
1580 }
1581 let shared_support =
1582 row.structural_score_proxy / row.rank.max(1) as f64 / participating.len() as f64;
1583 for feature_name in participating {
1584 let Some(&feature_index) = feature_index_by_name.get(feature_name) else {
1585 continue;
1586 };
1587 *contributions.entry(feature_index).or_default() += shared_support;
1588 }
1589 }
1590 contributions
1591}
1592
1593pub fn write_operator_delta_attainment_matrix_csv(
1594 path: &Path,
1595 rows: &[OperatorDeltaAttainmentRow],
1596) -> Result<()> {
1597 let mut writer = Writer::from_path(path)?;
1598 for row in rows {
1599 writer.serialize(row)?;
1600 }
1601 writer.flush()?;
1602 Ok(())
1603}
1604
1605pub fn write_operator_burden_contributions_csv(
1606 path: &Path,
1607 rows: &[OperatorBurdenContributionRow],
1608) -> Result<()> {
1609 let mut writer = Writer::from_path(path)?;
1610 for row in rows {
1611 writer.serialize(row)?;
1612 }
1613 writer.flush()?;
1614 Ok(())
1615}
1616
1617pub fn write_recall_recovery_efficiency_csv(
1618 path: &Path,
1619 rows: &[RecallRecoveryEfficiencyRow],
1620) -> Result<()> {
1621 let mut writer = Writer::from_path(path)?;
1622 for row in rows {
1623 writer.serialize(row)?;
1624 }
1625 writer.flush()?;
1626 Ok(())
1627}
1628
1629pub fn write_single_change_iteration_log_csv(
1630 path: &Path,
1631 rows: &[SingleChangeIterationRow],
1632) -> Result<()> {
1633 let mut writer = Writer::from_path(path)?;
1634 for row in rows {
1635 writer.serialize(row)?;
1636 }
1637 writer.flush()?;
1638 Ok(())
1639}
1640
1641pub fn write_feature_ranking_comparison_csv(
1642 path: &Path,
1643 rows: &[FeatureRankingComparisonRow],
1644) -> Result<()> {
1645 let mut writer = Writer::from_path(path)?;
1646 for row in rows {
1647 writer.serialize(row)?;
1648 }
1649 writer.flush()?;
1650 Ok(())
1651}
1652
1653pub fn write_feature_policy_summary_csv(
1654 path: &Path,
1655 rows: &[FeaturePolicySummaryRow],
1656) -> Result<()> {
1657 let mut writer = Writer::from_path(path)?;
1658 for row in rows {
1659 writer.serialize(row)?;
1660 }
1661 writer.flush()?;
1662 Ok(())
1663}
1664
1665pub fn write_recall_rescue_results_csv(path: &Path, rows: &[RecallRescueResultRow]) -> Result<()> {
1666 let mut writer = Writer::from_path(path)?;
1667 for row in rows {
1668 writer.serialize(row)?;
1669 }
1670 writer.flush()?;
1671 Ok(())
1672}
1673
1674pub fn write_missed_failure_diagnostics_csv(
1675 path: &Path,
1676 rows: &[MissedFailureDiagnosticRow],
1677) -> Result<()> {
1678 let mut writer = Writer::from_path(path)?;
1679 for row in rows {
1680 writer.serialize(row)?;
1681 }
1682 writer.flush()?;
1683 Ok(())
1684}
1685
1686pub fn write_recall_critical_features_csv(
1687 path: &Path,
1688 rows: &[RecallCriticalFeatureRow],
1689) -> Result<()> {
1690 let mut writer = Writer::from_path(path)?;
1691 for row in rows {
1692 writer.serialize(row)?;
1693 }
1694 writer.flush()?;
1695 Ok(())
1696}
1697
1698pub fn write_policy_contribution_analysis_csv(
1699 path: &Path,
1700 rows: &[PolicyContributionAnalysisRow],
1701) -> Result<()> {
1702 let mut writer = Writer::from_path(path)?;
1703 for row in rows {
1704 writer.serialize(row)?;
1705 }
1706 writer.flush()?;
1707 Ok(())
1708}
1709
1710pub fn build_feature_cohorts(ranking: &[FeatureRankingRow]) -> FeatureCohorts {
1711 let ranking_formula = ranking
1712 .first()
1713 .map(|row| row.ranking_formula.clone())
1714 .unwrap_or_else(|| RANKING_FORMULA.into());
1715 let top_4 = ranking
1716 .iter()
1717 .take(4)
1718 .map(|row| cohort_member(row, "top_4"))
1719 .collect::<Vec<_>>();
1720 let top_8 = ranking
1721 .iter()
1722 .take(8)
1723 .map(|row| cohort_member(row, "top_8"))
1724 .collect::<Vec<_>>();
1725 let top_16 = ranking
1726 .iter()
1727 .take(16)
1728 .map(|row| cohort_member(row, "top_16"))
1729 .collect::<Vec<_>>();
1730 let all_features = ranking
1731 .iter()
1732 .map(|row| cohort_member(row, "all_features"))
1733 .collect::<Vec<_>>();
1734
1735 let seed_feature_report = SEED_FEATURES
1736 .iter()
1737 .map(|seed| {
1738 if let Some(row) = ranking.iter().find(|row| row.feature_name == *seed) {
1739 SeedFeatureReport {
1740 feature_name: (*seed).to_string(),
1741 found_in_ranking: true,
1742 rank: Some(row.rank),
1743 candidate_score: Some(row.candidate_score),
1744 in_top_4: row.rank <= 4,
1745 in_top_8: row.rank <= 8,
1746 in_top_16: row.rank <= 16,
1747 top_4_note: seed_membership_note(row, 4, "top_4"),
1748 top_8_note: seed_membership_note(row, 8, "top_8"),
1749 top_16_note: seed_membership_note(row, 16, "top_16"),
1750 }
1751 } else {
1752 let note =
1753 "Excluded because the feature is not analyzable in the saved run metrics."
1754 .to_string();
1755 SeedFeatureReport {
1756 feature_name: (*seed).to_string(),
1757 found_in_ranking: false,
1758 rank: None,
1759 candidate_score: None,
1760 in_top_4: false,
1761 in_top_8: false,
1762 in_top_16: false,
1763 top_4_note: note.clone(),
1764 top_8_note: note.clone(),
1765 top_16_note: note,
1766 }
1767 }
1768 })
1769 .collect::<Vec<_>>();
1770
1771 FeatureCohorts {
1772 ranking_formula,
1773 missingness_penalty_threshold: MISSINGNESS_PENALTY_THRESHOLD,
1774 missingness_penalty_value: MISSINGNESS_PENALTY_VALUE,
1775 top_4,
1776 top_8,
1777 top_16,
1778 all_features,
1779 seed_feature_report,
1780 }
1781}
1782
1783pub fn build_seed_feature_check(cohorts: &FeatureCohorts) -> SeedFeatureCheckArtifact {
1784 SeedFeatureCheckArtifact {
1785 ranking_formula: cohorts.ranking_formula.clone(),
1786 requested_seed_features: SEED_FEATURES
1787 .iter()
1788 .map(|seed| (*seed).to_string())
1789 .collect(),
1790 seed_feature_report: cohorts.seed_feature_report.clone(),
1791 }
1792}
1793
1794pub fn run_cohort_dsa_grid(
1795 dataset: &PreparedDataset,
1796 nominal: &NominalModel,
1797 residuals: &ResidualSet,
1798 signs: &SignSet,
1799 baselines: &BaselineSet,
1800 grammar: &GrammarSet,
1801 cohorts: &FeatureCohorts,
1802 pre_failure_lookback_runs: usize,
1803 metrics: &BenchmarkMetrics,
1804) -> Result<CohortExecution> {
1805 run_cohort_dsa_grid_with_policy(
1806 dataset,
1807 nominal,
1808 residuals,
1809 signs,
1810 baselines,
1811 grammar,
1812 cohorts,
1813 pre_failure_lookback_runs,
1814 metrics,
1815 &DsaPolicyRuntime::default(),
1816 "compression_biased",
1817 )
1818}
1819
1820pub fn run_cohort_dsa_grid_with_policy(
1821 dataset: &PreparedDataset,
1822 nominal: &NominalModel,
1823 residuals: &ResidualSet,
1824 signs: &SignSet,
1825 baselines: &BaselineSet,
1826 grammar: &GrammarSet,
1827 cohorts: &FeatureCohorts,
1828 pre_failure_lookback_runs: usize,
1829 metrics: &BenchmarkMetrics,
1830 policy_runtime: &DsaPolicyRuntime,
1831 ranking_strategy: &str,
1832) -> Result<CohortExecution> {
1833 let cohort_specs = [
1834 ("top_4", cohorts.top_4.as_slice()),
1835 ("top_8", cohorts.top_8.as_slice()),
1836 ("top_16", cohorts.top_16.as_slice()),
1837 ("all_features", cohorts.all_features.as_slice()),
1838 ];
1839
1840 let threshold_recall = metrics.summary.failure_runs_with_preceding_threshold_signal;
1841 let ewma_nuisance = metrics.summary.pass_run_ewma_nuisance_rate;
1842 let mut grid_rows = Vec::new();
1843 let mut motif_policy_rows = Vec::new();
1844 let mut feature_trace_config_id = 0usize;
1845 let mut grid_row_id = 0usize;
1846
1847 for &window in DSA_WINDOW_SWEEP {
1848 for &persistence_runs in DSA_PERSISTENCE_SWEEP {
1849 for &alert_tau in DSA_TAU_SWEEP {
1850 let base_config = DsaConfig {
1851 window,
1852 persistence_runs,
1853 alert_tau,
1854 corroborating_feature_count_min: 1,
1855 };
1856 let base_evaluation = evaluate_dsa_with_policy(
1857 dataset,
1858 nominal,
1859 residuals,
1860 signs,
1861 baselines,
1862 grammar,
1863 &base_config,
1864 pre_failure_lookback_runs,
1865 policy_runtime,
1866 )?;
1867
1868 for (cohort_name, members) in cohort_specs {
1869 if members.is_empty() {
1870 continue;
1871 }
1872 let feature_indices = members
1873 .iter()
1874 .map(|member| member.feature_index)
1875 .collect::<Vec<_>>();
1876
1877 for &corroborating_m in CORROBORATION_SWEEP {
1878 if corroborating_m > feature_indices.len() {
1879 continue;
1880 }
1881 let evaluation = project_dsa_to_cohort(
1882 dataset,
1883 nominal,
1884 residuals,
1885 baselines,
1886 grammar,
1887 &base_evaluation,
1888 &feature_indices,
1889 corroborating_m,
1890 pre_failure_lookback_runs,
1891 cohort_name,
1892 )?;
1893
1894 let row = build_grid_row(
1895 grid_row_id,
1896 feature_trace_config_id,
1897 ranking_strategy,
1898 &cohorts.ranking_formula,
1899 cohort_name,
1900 members.len(),
1901 &base_config,
1902 corroborating_m,
1903 dataset,
1904 &evaluation,
1905 metrics,
1906 );
1907 motif_policy_rows.extend(build_motif_policy_rows(&row, &evaluation));
1908 grid_rows.push(row);
1909 grid_row_id += 1;
1910 }
1911 }
1912
1913 feature_trace_config_id += 1;
1914 }
1915 }
1916 }
1917
1918 let best_by_cohort = build_best_by_cohort(&grid_rows);
1919 let closest_to_success = choose_closest_to_success(&grid_rows);
1920 let best_primary_success = grid_rows
1921 .iter()
1922 .filter(|row| row.primary_success)
1923 .cloned()
1924 .min_by(compare_successful_rows);
1925 let best_precursor_quality_row = grid_rows.iter().cloned().max_by(|left, right| {
1926 compare_option_f64(left.precursor_quality, right.precursor_quality)
1927 .then_with(|| compare_successful_rows(left, right))
1928 });
1929 let any_primary_success = best_primary_success.is_some();
1930 let selected_configuration = best_primary_success
1931 .clone()
1932 .or_else(|| closest_to_success.clone());
1933 let best_cohort = selected_configuration.as_ref().map(row_label);
1934 let corroboration_effect = corroboration_effect(&grid_rows);
1935 let limiting_factor = limiting_factor_from_row(
1936 selected_configuration.as_ref(),
1937 ewma_nuisance,
1938 threshold_recall,
1939 );
1940 let failure_analysis = if any_primary_success {
1941 None
1942 } else {
1943 build_failure_analysis(
1944 &grid_rows,
1945 &motif_policy_rows,
1946 cohorts,
1947 ewma_nuisance,
1948 threshold_recall,
1949 selected_configuration.as_ref(),
1950 &corroboration_effect,
1951 &limiting_factor,
1952 )
1953 };
1954
1955 let summary = CohortDsaSummary {
1956 ranking_formula: cohorts.ranking_formula.clone(),
1957 primary_success_condition: primary_success_condition(),
1958 recall_tolerance_runs: RECALL_TOLERANCE,
1959 cohort_results: grid_rows.clone(),
1960 best_by_cohort,
1961 closest_to_success: closest_to_success.clone(),
1962 best_primary_success: best_primary_success.clone(),
1963 best_precursor_quality_row: best_precursor_quality_row.clone(),
1964 selected_configuration: selected_configuration.clone(),
1965 best_cohort,
1966 any_primary_success,
1967 failure_analysis,
1968 grid_point_count: grid_rows.len(),
1969 optimization_priority_order: optimization_priority_order(),
1970 cross_feature_corroboration_effect: corroboration_effect.clone(),
1971 limiting_factor: limiting_factor.clone(),
1972 };
1973
1974 let grid_summary = CohortGridSummary {
1975 ranking_formula: cohorts.ranking_formula.clone(),
1976 primary_success_condition_definition: primary_success_condition(),
1977 recall_tolerance_runs: RECALL_TOLERANCE,
1978 grid_point_count: grid_rows.len(),
1979 optimization_priority_order: optimization_priority_order(),
1980 success_row_count: grid_rows.iter().filter(|row| row.primary_success).count(),
1981 any_success_row: any_primary_success,
1982 closest_to_success: closest_to_success.clone(),
1983 best_success_row: best_primary_success.clone(),
1984 best_precursor_quality_row: best_precursor_quality_row,
1985 cross_feature_corroboration_effect: corroboration_effect,
1986 limiting_factor,
1987 };
1988
1989 let selected_row = selected_configuration.ok_or_else(|| {
1990 DsfbSemiconductorError::DatasetFormat("cohort grid produced no selectable row".into())
1991 })?;
1992 let selected_evaluation = rebuild_selected_evaluation(
1993 dataset,
1994 nominal,
1995 residuals,
1996 signs,
1997 baselines,
1998 grammar,
1999 cohorts,
2000 pre_failure_lookback_runs,
2001 &selected_row,
2002 )?;
2003
2004 Ok(CohortExecution {
2005 grid_summary,
2006 summary,
2007 motif_policy_contributions: motif_policy_rows,
2008 selected_evaluation,
2009 })
2010}
2011
2012pub fn run_recall_optimization(
2013 dataset: &PreparedDataset,
2014 nominal: &NominalModel,
2015 residuals: &ResidualSet,
2016 signs: &SignSet,
2017 baselines: &BaselineSet,
2018 grammar: &GrammarSet,
2019 metrics: &BenchmarkMetrics,
2020 semantic_layer: &SemanticLayer,
2021 scaffold_semiotics: &ScaffoldSemioticsArtifacts,
2022 pre_failure_lookback_runs: usize,
2023) -> Result<OptimizationExecution> {
2024 let baseline_feature_ranking = compute_feature_ranking(metrics);
2025 let baseline_feature_cohorts = build_feature_cohorts(&baseline_feature_ranking);
2026 let baseline_execution = run_cohort_dsa_grid(
2027 dataset,
2028 nominal,
2029 residuals,
2030 signs,
2031 baselines,
2032 grammar,
2033 &baseline_feature_cohorts,
2034 pre_failure_lookback_runs,
2035 metrics,
2036 )?;
2037
2038 let recall_rescue_contributions =
2039 recall_rescue_contribution_by_feature(&baseline_execution.selected_evaluation);
2040 let operator_burden_contributions =
2041 operator_burden_contribution_by_feature(dataset, &baseline_execution.selected_evaluation);
2042 let recall_aware_feature_ranking =
2043 compute_feature_ranking_recall_aware(metrics, &recall_rescue_contributions);
2044 let burden_aware_feature_ranking = compute_feature_ranking_burden_aware(
2045 metrics,
2046 &recall_rescue_contributions,
2047 &operator_burden_contributions,
2048 );
2049 let dsfb_aware_feature_ranking = compute_feature_ranking_dsfb_aware(
2050 metrics,
2051 &recall_rescue_contributions,
2052 &operator_burden_contributions,
2053 semantic_layer,
2054 scaffold_semiotics,
2055 );
2056 let ranking_comparison = compare_feature_rankings(
2057 &baseline_feature_ranking,
2058 &recall_aware_feature_ranking,
2059 &burden_aware_feature_ranking,
2060 &dsfb_aware_feature_ranking,
2061 );
2062 let recall_aware_feature_cohorts = build_feature_cohorts(&recall_aware_feature_ranking);
2063 let burden_aware_feature_cohorts = build_feature_cohorts(&burden_aware_feature_ranking);
2064 let dsfb_aware_feature_cohorts = build_feature_cohorts(&dsfb_aware_feature_ranking);
2065 let (feature_policy_overrides, single_change_iteration_log) = build_feature_policy_overrides(
2066 dataset,
2067 nominal,
2068 residuals,
2069 signs,
2070 baselines,
2071 grammar,
2072 metrics,
2073 baseline_execution
2074 .summary
2075 .selected_configuration
2076 .as_ref()
2077 .unwrap_or_else(|| {
2078 panic!("baseline cohort execution must provide a selected configuration")
2079 }),
2080 &baseline_execution.selected_evaluation,
2081 &recall_aware_feature_ranking,
2082 semantic_layer,
2083 pre_failure_lookback_runs,
2084 )?;
2085 let feature_policy_summary = build_feature_policy_summary(
2086 metrics,
2087 &baseline_feature_ranking,
2088 &recall_aware_feature_ranking,
2089 &burden_aware_feature_ranking,
2090 &dsfb_aware_feature_ranking,
2091 &feature_policy_overrides,
2092 );
2093 let policy_runtime = DsaPolicyRuntime {
2094 feature_policy_overrides: feature_policy_overrides.clone(),
2095 recall_rescue: RecallRescueConfig {
2096 enabled: true,
2097 ..RecallRescueConfig::default()
2098 },
2099 semantic_rescue_support: build_semantic_rescue_support(
2100 semantic_layer,
2101 dataset.labels.len(),
2102 ),
2103 };
2104
2105 let optimized_compression_execution = run_cohort_dsa_grid_with_policy(
2106 dataset,
2107 nominal,
2108 residuals,
2109 signs,
2110 baselines,
2111 grammar,
2112 &baseline_feature_cohorts,
2113 pre_failure_lookback_runs,
2114 metrics,
2115 &policy_runtime,
2116 "compression_biased",
2117 )?;
2118 let recall_aware_execution = run_cohort_dsa_grid_with_policy(
2119 dataset,
2120 nominal,
2121 residuals,
2122 signs,
2123 baselines,
2124 grammar,
2125 &recall_aware_feature_cohorts,
2126 pre_failure_lookback_runs,
2127 metrics,
2128 &policy_runtime,
2129 "recall_aware",
2130 )?;
2131 let burden_aware_execution = run_cohort_dsa_grid_with_policy(
2132 dataset,
2133 nominal,
2134 residuals,
2135 signs,
2136 baselines,
2137 grammar,
2138 &burden_aware_feature_cohorts,
2139 pre_failure_lookback_runs,
2140 metrics,
2141 &policy_runtime,
2142 "burden_aware",
2143 )?;
2144 let dsfb_aware_execution = run_cohort_dsa_grid_with_policy(
2145 dataset,
2146 nominal,
2147 residuals,
2148 signs,
2149 baselines,
2150 grammar,
2151 &dsfb_aware_feature_cohorts,
2152 pre_failure_lookback_runs,
2153 metrics,
2154 &policy_runtime,
2155 "dsfb_aware",
2156 )?;
2157
2158 let mut union_rows = optimized_compression_execution
2159 .summary
2160 .cohort_results
2161 .clone();
2162 union_rows.extend(recall_aware_execution.summary.cohort_results.clone());
2163 union_rows.extend(burden_aware_execution.summary.cohort_results.clone());
2164 union_rows.extend(dsfb_aware_execution.summary.cohort_results.clone());
2165 let operator_baselines =
2166 build_operator_baselines(dataset, grammar, &baseline_execution.selected_evaluation);
2167
2168 let current_policy_dsa_nuisance = baseline_execution
2169 .summary
2170 .selected_configuration
2171 .as_ref()
2172 .map(|row| row.pass_run_nuisance_proxy)
2173 .unwrap_or(
2174 metrics
2175 .summary
2176 .pass_run_dsfb_persistent_boundary_nuisance_rate,
2177 );
2178 let pareto_frontier = pareto_frontier(&union_rows);
2179 let stage_a_candidates = stage_a_candidates(
2180 &union_rows,
2181 metrics.summary.pass_run_dsfb_raw_boundary_nuisance_rate,
2182 current_policy_dsa_nuisance,
2183 );
2184 let stage_b_candidates = stage_b_candidates(
2185 &stage_a_candidates,
2186 metrics.summary.pass_run_ewma_nuisance_rate,
2187 current_policy_dsa_nuisance,
2188 );
2189 let stage1_candidates = stage1_candidates(&union_rows, &operator_baselines);
2190 let stage2_candidates = stage2_candidates(&stage1_candidates, &operator_baselines);
2191 let selected_row = stage2_candidates
2192 .first()
2193 .cloned()
2194 .or_else(|| stage1_candidates.first().cloned())
2195 .or_else(|| {
2196 choose_optimized_row(
2197 &stage_b_candidates,
2198 &union_rows,
2199 metrics.summary.pass_run_ewma_nuisance_rate,
2200 metrics.summary.failure_runs_with_preceding_threshold_signal,
2201 current_policy_dsa_nuisance,
2202 )
2203 })
2204 .ok_or_else(|| {
2205 DsfbSemiconductorError::DatasetFormat(
2206 "optimized search produced no selectable configuration".into(),
2207 )
2208 })?;
2209
2210 let selected_evaluation = rebuild_selected_evaluation_with_policy(
2211 dataset,
2212 nominal,
2213 residuals,
2214 signs,
2215 baselines,
2216 grammar,
2217 &baseline_feature_cohorts,
2218 &recall_aware_feature_cohorts,
2219 &burden_aware_feature_cohorts,
2220 &dsfb_aware_feature_cohorts,
2221 pre_failure_lookback_runs,
2222 &selected_row,
2223 &policy_runtime,
2224 )?;
2225
2226 let mut optimized_execution = match selected_row.ranking_strategy.as_str() {
2227 "recall_aware" => recall_aware_execution.clone(),
2228 "burden_aware" => burden_aware_execution.clone(),
2229 "dsfb_aware" => dsfb_aware_execution.clone(),
2230 _ => optimized_compression_execution.clone(),
2231 };
2232 optimized_execution.selected_evaluation = selected_evaluation.clone();
2233 optimized_execution.summary.selected_configuration = Some(selected_row.clone());
2234
2235 let recall_rescue_results = union_rows
2236 .iter()
2237 .map(|row| RecallRescueResultRow {
2238 ranking_strategy: row.ranking_strategy.clone(),
2239 cohort_name: row.cohort_name.clone(),
2240 window: row.window,
2241 persistence_runs: row.persistence_runs,
2242 alert_tau: row.alert_tau,
2243 corroborating_m: row.corroborating_m,
2244 failure_recall: row.failure_recall,
2245 pass_run_nuisance_proxy: row.pass_run_nuisance_proxy,
2246 rescued_point_count: row.rescued_point_count,
2247 rescued_watch_to_review_points: row.rescued_watch_to_review_points,
2248 rescued_review_to_escalate_points: row.rescued_review_to_escalate_points,
2249 })
2250 .collect::<Vec<_>>();
2251 let missed_failure_diagnostics = build_missed_failure_diagnostics(
2252 &baseline_execution.selected_evaluation,
2253 &selected_evaluation,
2254 &feature_policy_overrides,
2255 );
2256 let recall_critical_features = build_recall_critical_features(
2257 &baseline_execution.selected_evaluation,
2258 &selected_evaluation,
2259 &baseline_feature_ranking,
2260 &recall_aware_feature_ranking,
2261 &feature_policy_overrides,
2262 &recall_rescue_contributions,
2263 );
2264 let policy_contribution_analysis = build_policy_contribution_analysis(
2265 &baseline_execution.selected_evaluation,
2266 &selected_evaluation,
2267 &selected_row,
2268 );
2269 let operator_delta_targets = compute_operator_delta_targets(
2270 &selected_row,
2271 &selected_evaluation,
2272 &operator_baselines,
2273 metrics,
2274 );
2275 let operator_delta_attainment_matrix = build_operator_delta_attainment_matrix(
2276 &selected_row,
2277 &stage1_candidates,
2278 &stage2_candidates,
2279 &operator_baselines,
2280 metrics,
2281 );
2282 let policy_operator_burden_contributions = build_policy_operator_burden_contributions(
2283 dataset,
2284 &baseline_execution.selected_evaluation,
2285 &selected_evaluation,
2286 &selected_row,
2287 );
2288 let recall_recovery_efficiency = build_recall_recovery_efficiency(
2289 dataset,
2290 &baseline_execution.selected_evaluation,
2291 &selected_evaluation,
2292 pre_failure_lookback_runs,
2293 );
2294 let delta_target_assessment = compute_delta_target_assessment(
2295 &selected_row,
2296 &stage_a_candidates,
2297 &union_rows,
2298 baseline_execution
2299 .summary
2300 .selected_configuration
2301 .as_ref()
2302 .unwrap_or_else(|| {
2303 panic!("baseline cohort execution must provide a selected configuration")
2304 }),
2305 metrics,
2306 );
2307
2308 Ok(OptimizationExecution {
2309 baseline_feature_ranking,
2310 baseline_feature_cohorts,
2311 baseline_execution,
2312 recall_aware_feature_ranking,
2313 burden_aware_feature_ranking,
2314 dsfb_aware_feature_ranking,
2315 ranking_comparison,
2316 recall_aware_feature_cohorts,
2317 burden_aware_feature_cohorts,
2318 dsfb_aware_feature_cohorts,
2319 feature_policy_overrides,
2320 feature_policy_summary,
2321 optimized_execution,
2322 recall_aware_execution,
2323 burden_aware_execution,
2324 dsfb_aware_execution,
2325 pareto_frontier,
2326 stage_a_candidates,
2327 stage_b_candidates,
2328 stage1_candidates,
2329 stage2_candidates,
2330 recall_rescue_results,
2331 missed_failure_diagnostics,
2332 recall_critical_features,
2333 policy_contribution_analysis,
2334 operator_baselines,
2335 operator_delta_targets,
2336 operator_delta_attainment_matrix,
2337 policy_operator_burden_contributions,
2338 recall_recovery_efficiency,
2339 single_change_iteration_log,
2340 delta_target_assessment,
2341 })
2342}
2343
2344#[derive(Debug, Clone, Default)]
2345struct FailureSupportCandidate {
2346 feature_name: String,
2347 support_failure_count: usize,
2348 max_score: f64,
2349 max_boundary_density: f64,
2350 max_ewma_occupancy: f64,
2351 max_motif_recurrence: f64,
2352 pass_review_burden: usize,
2353}
2354
2355fn build_semantic_rescue_support(
2356 semantic_layer: &SemanticLayer,
2357 run_count: usize,
2358) -> BTreeMap<usize, Vec<bool>> {
2359 let mut support = BTreeMap::<usize, Vec<bool>>::new();
2360 for row in &semantic_layer.semantic_matches {
2361 if !is_strong_semantic_rescue_heuristic(&row.heuristic_name) {
2362 continue;
2363 }
2364 support
2365 .entry(row.feature_index)
2366 .or_insert_with(|| vec![false; run_count])[row.run_index] = true;
2367 }
2368 support
2369}
2370
2371fn is_strong_semantic_rescue_heuristic(heuristic_name: &str) -> bool {
2372 matches!(
2373 heuristic_name,
2374 PERSISTENT_INSTABILITY_CLUSTER
2375 | PRE_FAILURE_SLOW_DRIFT
2376 | RECURRENT_BOUNDARY_APPROACH
2377 | TRANSITION_EXCURSION
2378 )
2379}
2380
2381fn feature_review_burden_maps(
2382 dataset: &PreparedDataset,
2383 evaluation: &DsaEvaluation,
2384 pre_failure_lookback_runs: usize,
2385) -> (BTreeMap<usize, usize>, BTreeMap<usize, usize>) {
2386 let failure_indices = dataset
2387 .labels
2388 .iter()
2389 .enumerate()
2390 .filter_map(|(index, label)| (*label == 1).then_some(index))
2391 .collect::<Vec<_>>();
2392 let failure_window_mask = build_failure_window_mask(
2393 dataset.labels.len(),
2394 &failure_indices,
2395 pre_failure_lookback_runs,
2396 );
2397 let mut pass_review_burden = BTreeMap::new();
2398 let mut pre_failure_review_burden = BTreeMap::new();
2399 for trace in &evaluation.traces {
2400 let pass_count = trace
2401 .dsa_alert
2402 .iter()
2403 .enumerate()
2404 .filter(|(run_index, flag)| dataset.labels[*run_index] == -1 && **flag)
2405 .count();
2406 let pre_failure_count = trace
2407 .dsa_alert
2408 .iter()
2409 .enumerate()
2410 .filter(|(run_index, flag)| failure_window_mask[*run_index] && **flag)
2411 .count();
2412 pass_review_burden.insert(trace.feature_index, pass_count);
2413 pre_failure_review_burden.insert(trace.feature_index, pre_failure_count);
2414 }
2415 (pass_review_burden, pre_failure_review_burden)
2416}
2417
2418fn build_failure_window_mask(
2419 len: usize,
2420 failure_indices: &[usize],
2421 pre_failure_lookback_runs: usize,
2422) -> Vec<bool> {
2423 let mut mask = vec![false; len];
2424 for &failure_index in failure_indices {
2425 let start = failure_index.saturating_sub(pre_failure_lookback_runs);
2426 for flag in &mut mask[start..failure_index] {
2427 *flag = true;
2428 }
2429 }
2430 mask
2431}
2432
2433fn failure_local_support_candidates(
2434 baseline_selected_row: &CohortGridResult,
2435 baseline_evaluation: &DsaEvaluation,
2436 semantic_layer: &SemanticLayer,
2437 pre_failure_lookback_runs: usize,
2438 pass_review_burden_by_feature: &BTreeMap<usize, usize>,
2439) -> BTreeMap<usize, FailureSupportCandidate> {
2440 let semantic_support = build_semantic_rescue_support(
2441 semantic_layer,
2442 baseline_evaluation
2443 .traces
2444 .first()
2445 .map(|trace| trace.dsa_score.len())
2446 .unwrap_or_default(),
2447 );
2448 let traces_by_feature = baseline_evaluation
2449 .traces
2450 .iter()
2451 .map(|trace| (trace.feature_index, trace))
2452 .collect::<BTreeMap<_, _>>();
2453 let mut candidates = BTreeMap::<usize, FailureSupportCandidate>::new();
2454
2455 for signal in baseline_evaluation
2456 .per_failure_run_signals
2457 .iter()
2458 .filter(|signal| signal.earliest_dsa_run.is_none())
2459 {
2460 let failure_index = signal.failure_run_index;
2461 let start = failure_index.saturating_sub(pre_failure_lookback_runs);
2462 let mut per_failure = traces_by_feature
2463 .values()
2464 .filter_map(|trace| {
2465 let semantic_hits = semantic_support
2466 .get(&trace.feature_index)
2467 .map(|flags| {
2468 flags[start..failure_index]
2469 .iter()
2470 .filter(|flag| **flag)
2471 .count()
2472 })
2473 .unwrap_or(0);
2474 if semantic_hits == 0 {
2475 return None;
2476 }
2477 let max_score = trace.dsa_score[start..failure_index]
2478 .iter()
2479 .copied()
2480 .fold(0.0, f64::max);
2481 if max_score < (baseline_selected_row.alert_tau - 1.0).max(0.0) {
2482 return None;
2483 }
2484 let pass_review_burden = pass_review_burden_by_feature
2485 .get(&trace.feature_index)
2486 .copied()
2487 .unwrap_or(0);
2488 if pass_review_burden > 12 {
2489 return None;
2490 }
2491 let max_boundary_density = trace.boundary_density_w[start..failure_index]
2492 .iter()
2493 .copied()
2494 .fold(0.0, f64::max);
2495 let max_ewma_occupancy = trace.ewma_occupancy_w[start..failure_index]
2496 .iter()
2497 .copied()
2498 .fold(0.0, f64::max);
2499 let max_motif_recurrence = trace.motif_recurrence_w[start..failure_index]
2500 .iter()
2501 .copied()
2502 .fold(0.0, f64::max);
2503 let support_score = max_score
2504 + 0.5 * max_motif_recurrence
2505 + 0.25 * max_boundary_density
2506 + 0.25 * max_ewma_occupancy;
2507 Some((
2508 trace.feature_index,
2509 trace.feature_name.clone(),
2510 semantic_hits,
2511 max_score,
2512 max_boundary_density,
2513 max_ewma_occupancy,
2514 max_motif_recurrence,
2515 pass_review_burden,
2516 support_score,
2517 ))
2518 })
2519 .collect::<Vec<_>>();
2520 per_failure.sort_by(|left, right| {
2521 right
2522 .8
2523 .partial_cmp(&left.8)
2524 .unwrap_or(Ordering::Equal)
2525 .then_with(|| left.1.cmp(&right.1))
2526 });
2527 for (
2528 feature_index,
2529 feature_name,
2530 _semantic_hits,
2531 max_score,
2532 max_boundary_density,
2533 max_ewma_occupancy,
2534 max_motif_recurrence,
2535 pass_review_burden,
2536 _support_score,
2537 ) in per_failure.into_iter().take(3)
2538 {
2539 let entry = candidates.entry(feature_index).or_default();
2540 entry.feature_name = feature_name;
2541 entry.support_failure_count += 1;
2542 entry.max_score = entry.max_score.max(max_score);
2543 entry.max_boundary_density = entry.max_boundary_density.max(max_boundary_density);
2544 entry.max_ewma_occupancy = entry.max_ewma_occupancy.max(max_ewma_occupancy);
2545 entry.max_motif_recurrence = entry.max_motif_recurrence.max(max_motif_recurrence);
2546 entry.pass_review_burden = pass_review_burden;
2547 }
2548 }
2549
2550 candidates
2551}
2552
2553fn build_feature_policy_overrides(
2554 dataset: &PreparedDataset,
2555 nominal: &NominalModel,
2556 residuals: &ResidualSet,
2557 signs: &SignSet,
2558 baselines: &BaselineSet,
2559 grammar: &GrammarSet,
2560 metrics: &BenchmarkMetrics,
2561 baseline_selected_row: &CohortGridResult,
2562 baseline_evaluation: &DsaEvaluation,
2563 recall_aware_ranking: &[FeatureRankingRow],
2564 semantic_layer: &SemanticLayer,
2565 pre_failure_lookback_runs: usize,
2566) -> Result<(Vec<FeaturePolicyOverride>, Vec<SingleChangeIterationRow>)> {
2567 let feature_metrics = metrics
2568 .feature_metrics
2569 .iter()
2570 .map(|feature| (feature.feature_index, feature))
2571 .collect::<BTreeMap<_, _>>();
2572 let recall_rank_by_feature = recall_aware_ranking
2573 .iter()
2574 .map(|row| (row.feature_index, row))
2575 .collect::<BTreeMap<_, _>>();
2576 let (pass_review_burden_by_feature, pre_failure_review_burden_by_feature) =
2577 feature_review_burden_maps(dataset, baseline_evaluation, pre_failure_lookback_runs);
2578 let support_candidates = failure_local_support_candidates(
2579 baseline_selected_row,
2580 baseline_evaluation,
2581 semantic_layer,
2582 pre_failure_lookback_runs,
2583 &pass_review_burden_by_feature,
2584 );
2585 let mut missed_feature_stats = BTreeMap::<usize, (String, usize, f64)>::new();
2586
2587 for signal in baseline_evaluation
2588 .per_failure_run_signals
2589 .iter()
2590 .filter(|signal| signal.earliest_dsa_run.is_none())
2591 {
2592 let Some(feature_index) = signal.max_dsa_score_feature_index else {
2593 continue;
2594 };
2595 let Some(feature_name) = signal.max_dsa_score_feature_name.as_ref() else {
2596 continue;
2597 };
2598 let score = signal.max_dsa_score_in_lookback.unwrap_or(0.0);
2599 let entry = missed_feature_stats
2600 .entry(feature_index)
2601 .or_insert_with(|| (feature_name.clone(), 0, 0.0));
2602 entry.1 += 1;
2603 entry.2 = entry.2.max(score);
2604 }
2605
2606 let mut overrides = missed_feature_stats
2607 .into_iter()
2608 .filter_map(|(feature_index, (feature_name, miss_count, max_score))| {
2609 let feature_metric = feature_metrics.get(&feature_index)?;
2610 let recall_rank = recall_rank_by_feature.get(&feature_index).map(|row| row.rank);
2611 let max_score_floor = baseline_selected_row.alert_tau - 0.40;
2612 if max_score < max_score_floor
2613 || feature_metric.missing_fraction > OPTIMIZATION_OVERRIDE_MAX_MISSINGNESS
2614 || feature_metric.pre_failure_run_hits == 0
2615 || feature_metric.motif_precision_proxy.unwrap_or(0.0) <= 0.0
2616 {
2617 return None;
2618 }
2619
2620 let rescue_priority =
2621 if miss_count >= 2 || max_score >= baseline_selected_row.alert_tau - 0.10 {
2622 2
2623 } else {
2624 1
2625 };
2626 let fragmentation_override =
2627 if feature_metric.motif_precision_proxy.unwrap_or(0.0) >= 0.70
2628 && max_score >= baseline_selected_row.alert_tau - 0.10
2629 {
2630 1.0
2631 } else {
2632 OPTIMIZATION_RESCUE_FRAGMENTATION
2633 };
2634
2635 Some(FeaturePolicyOverride {
2636 feature_index,
2637 feature_name: feature_name.clone(),
2638 alert_class_override: None,
2639 requires_persistence_override: Some(false),
2640 requires_corroboration_override: Some(false),
2641 minimum_window_override: Some(OPTIMIZATION_RESCUE_WINDOW),
2642 minimum_hits_override: Some(OPTIMIZATION_RESCUE_MIN_HITS),
2643 maximum_allowed_fragmentation_override: Some(fragmentation_override),
2644 rescue_eligible: true,
2645 rescue_priority,
2646 allow_watch_only: Some(false),
2647 allow_review_without_escalate: Some(true),
2648 suppress_if_isolated: Some(false),
2649 override_reason: format!(
2650 "Feature was the nearest current-DSA miss on {} failure run(s), max near-miss score {:.4}, recall-aware rank {}, pre_failure_run_hits={}, motif_precision_proxy={}, rescue_fragmentation_ceiling={:.2}.",
2651 miss_count,
2652 max_score,
2653 recall_rank
2654 .map(|rank| rank.to_string())
2655 .unwrap_or_else(|| "n/a".into()),
2656 feature_metric.pre_failure_run_hits,
2657 format_option_f64(feature_metric.motif_precision_proxy),
2658 fragmentation_override,
2659 ),
2660 })
2661 })
2662 .collect::<Vec<_>>();
2663
2664 let existing_override_features = overrides
2665 .iter()
2666 .map(|override_entry| override_entry.feature_index)
2667 .collect::<BTreeSet<_>>();
2668
2669 let mut support_overrides = support_candidates
2670 .into_iter()
2671 .filter(|(feature_index, _)| !existing_override_features.contains(feature_index))
2672 .filter_map(|(feature_index, candidate)| {
2673 let feature_metric = feature_metrics.get(&feature_index)?;
2674 let recall_rank = recall_rank_by_feature.get(&feature_index).map(|row| row.rank);
2675 if feature_metric.missing_fraction > OPTIMIZATION_OVERRIDE_MAX_MISSINGNESS
2676 || feature_metric.pre_failure_run_hits == 0
2677 || feature_metric.motif_precision_proxy.unwrap_or(0.0) < 0.45
2678 {
2679 return None;
2680 }
2681
2682 Some(FeaturePolicyOverride {
2683 feature_index,
2684 feature_name: candidate.feature_name.clone(),
2685 alert_class_override: Some(HeuristicAlertClass::Watch),
2686 requires_persistence_override: Some(false),
2687 requires_corroboration_override: Some(false),
2688 minimum_window_override: Some(2),
2689 minimum_hits_override: Some(1),
2690 maximum_allowed_fragmentation_override: Some(1.0),
2691 rescue_eligible: true,
2692 rescue_priority: 3,
2693 allow_watch_only: Some(false),
2694 allow_review_without_escalate: Some(true),
2695 suppress_if_isolated: Some(false),
2696 override_reason: format!(
2697 "Feature is a low-burden grammar-qualified support candidate for {} missed failure run(s), max near-miss score {:.4}, max_boundary_density={:.2}, max_ewma_occupancy={:.2}, max_motif_recurrence={:.2}, recall-aware rank {}, pass_review_burden={}.",
2698 candidate.support_failure_count,
2699 candidate.max_score,
2700 candidate.max_boundary_density,
2701 candidate.max_ewma_occupancy,
2702 candidate.max_motif_recurrence,
2703 recall_rank
2704 .map(|rank| rank.to_string())
2705 .unwrap_or_else(|| "n/a".into()),
2706 candidate.pass_review_burden,
2707 ),
2708 })
2709 })
2710 .collect::<Vec<_>>();
2711
2712 let protected_features = overrides
2713 .iter()
2714 .chain(&support_overrides)
2715 .map(|override_entry| override_entry.feature_index)
2716 .collect::<BTreeSet<_>>();
2717
2718 let mut nuisance_overrides = pass_review_burden_by_feature
2719 .iter()
2720 .filter(|(feature_index, _)| !protected_features.contains(feature_index))
2721 .filter_map(|(&feature_index, &pass_review_burden)| {
2722 let feature_metric = feature_metrics.get(&feature_index)?;
2723 let pre_failure_review_burden = pre_failure_review_burden_by_feature
2724 .get(&feature_index)
2725 .copied()
2726 .unwrap_or(0);
2727 if pass_review_burden < 300
2728 || pre_failure_review_burden.saturating_mul(20) > pass_review_burden
2729 || feature_metric.missing_fraction > OPTIMIZATION_OVERRIDE_MAX_MISSINGNESS
2730 {
2731 return None;
2732 }
2733
2734 Some((
2735 pass_review_burden,
2736 pre_failure_review_burden,
2737 FeaturePolicyOverride {
2738 feature_index,
2739 feature_name: feature_metric.feature_name.clone(),
2740 alert_class_override: Some(HeuristicAlertClass::Watch),
2741 requires_persistence_override: Some(true),
2742 requires_corroboration_override: Some(true),
2743 minimum_window_override: Some(OPTIMIZATION_RESCUE_WINDOW),
2744 minimum_hits_override: Some(OPTIMIZATION_RESCUE_MIN_HITS),
2745 maximum_allowed_fragmentation_override: Some(
2746 OPTIMIZATION_RESCUE_FRAGMENTATION,
2747 ),
2748 rescue_eligible: false,
2749 rescue_priority: 0,
2750 allow_watch_only: Some(true),
2751 allow_review_without_escalate: Some(false),
2752 suppress_if_isolated: Some(true),
2753 override_reason: format!(
2754 "Feature dominates pass-run burden ({}) but contributes only {} pre-failure Review/Escalate points inside the fixed lookback windows; clamp to Watch and suppress if isolated.",
2755 pass_review_burden, pre_failure_review_burden,
2756 ),
2757 },
2758 ))
2759 })
2760 .collect::<Vec<_>>();
2761 nuisance_overrides.sort_by(|left, right| {
2762 right
2763 .0
2764 .cmp(&left.0)
2765 .then_with(|| left.1.cmp(&right.1))
2766 .then_with(|| left.2.feature_name.cmp(&right.2.feature_name))
2767 });
2768 let mut nuisance_overrides = nuisance_overrides
2769 .into_iter()
2770 .take(MAX_FAILURE_DRIVEN_NUISANCE_OVERRIDES)
2771 .map(|(_, _, override_entry)| override_entry)
2772 .collect::<Vec<_>>();
2773
2774 let mut iteration = 1usize;
2775 let mut current_overrides = Vec::new();
2776 let mut current_evaluation = baseline_evaluation.clone();
2777 let mut iteration_log = Vec::new();
2778
2779 overrides.sort_by(|left, right| left.feature_name.cmp(&right.feature_name));
2780 support_overrides.sort_by(|left, right| {
2781 right
2782 .rescue_priority
2783 .cmp(&left.rescue_priority)
2784 .then_with(|| left.feature_name.cmp(&right.feature_name))
2785 });
2786
2787 for candidate in overrides.into_iter().chain(support_overrides.into_iter()) {
2788 let mut candidate_overrides = current_overrides.clone();
2789 candidate_overrides.push(candidate.clone());
2790 let candidate_evaluation = evaluate_selected_row_with_overrides(
2791 dataset,
2792 nominal,
2793 residuals,
2794 signs,
2795 baselines,
2796 grammar,
2797 baseline_selected_row,
2798 baseline_evaluation,
2799 semantic_layer,
2800 pre_failure_lookback_runs,
2801 &candidate_overrides,
2802 )?;
2803 let affected_failures =
2804 newly_recovered_failures(¤t_evaluation, &candidate_evaluation);
2805 let accepted = !affected_failures.is_empty();
2806 iteration_log.push(single_change_iteration_row(
2807 iteration,
2808 "rescue_rule",
2809 &candidate.feature_name,
2810 candidate.override_reason.clone(),
2811 &affected_failures,
2812 "",
2813 accepted,
2814 ¤t_evaluation,
2815 &candidate_evaluation,
2816 ));
2817 iteration += 1;
2818 if accepted {
2819 current_overrides = candidate_overrides;
2820 current_evaluation = candidate_evaluation;
2821 }
2822 }
2823
2824 let mut protected_features = current_overrides
2825 .iter()
2826 .map(|override_entry| override_entry.feature_index)
2827 .collect::<BTreeSet<_>>();
2828 let isolated_nuisance_overrides =
2829 build_isolated_nuisance_overrides(dataset, ¤t_evaluation, &protected_features);
2830
2831 nuisance_overrides.extend(isolated_nuisance_overrides);
2832 nuisance_overrides.sort_by(|left, right| left.feature_name.cmp(&right.feature_name));
2833
2834 for candidate in nuisance_overrides {
2835 if protected_features.contains(&candidate.feature_index) {
2836 continue;
2837 }
2838 let mut candidate_overrides = current_overrides.clone();
2839 candidate_overrides.push(candidate.clone());
2840 let candidate_evaluation = evaluate_selected_row_with_overrides(
2841 dataset,
2842 nominal,
2843 residuals,
2844 signs,
2845 baselines,
2846 grammar,
2847 baseline_selected_row,
2848 baseline_evaluation,
2849 semantic_layer,
2850 pre_failure_lookback_runs,
2851 &candidate_overrides,
2852 )?;
2853 let accepted = candidate_evaluation.summary.failure_run_recall
2854 >= current_evaluation.summary.failure_run_recall
2855 && (candidate_evaluation.summary.alert_point_count
2856 < current_evaluation.summary.alert_point_count
2857 || candidate_evaluation.episode_summary.dsa_episode_count
2858 < current_evaluation.episode_summary.dsa_episode_count
2859 || compare_option_gt(
2860 candidate_evaluation.episode_summary.precursor_quality,
2861 current_evaluation.episode_summary.precursor_quality,
2862 ) == Some(true));
2863 iteration_log.push(single_change_iteration_row(
2864 iteration,
2865 "policy_constraint",
2866 &candidate.feature_name,
2867 candidate.override_reason.clone(),
2868 &[],
2869 "isolated_pass_only_episode",
2870 accepted,
2871 ¤t_evaluation,
2872 &candidate_evaluation,
2873 ));
2874 iteration += 1;
2875 if accepted {
2876 protected_features.insert(candidate.feature_index);
2877 current_overrides = candidate_overrides;
2878 current_evaluation = candidate_evaluation;
2879 }
2880 }
2881
2882 current_overrides.sort_by(|left, right| {
2883 right
2884 .rescue_priority
2885 .cmp(&left.rescue_priority)
2886 .then_with(|| left.feature_name.cmp(&right.feature_name))
2887 });
2888 Ok((current_overrides, iteration_log))
2889}
2890
2891fn evaluate_selected_row_with_overrides(
2892 dataset: &PreparedDataset,
2893 nominal: &NominalModel,
2894 residuals: &ResidualSet,
2895 signs: &SignSet,
2896 baselines: &BaselineSet,
2897 grammar: &GrammarSet,
2898 baseline_selected_row: &CohortGridResult,
2899 baseline_evaluation: &DsaEvaluation,
2900 semantic_layer: &SemanticLayer,
2901 pre_failure_lookback_runs: usize,
2902 feature_policy_overrides: &[FeaturePolicyOverride],
2903) -> Result<DsaEvaluation> {
2904 let policy_runtime = DsaPolicyRuntime {
2905 feature_policy_overrides: feature_policy_overrides.to_vec(),
2906 recall_rescue: RecallRescueConfig {
2907 enabled: true,
2908 ..RecallRescueConfig::default()
2909 },
2910 semantic_rescue_support: build_semantic_rescue_support(
2911 semantic_layer,
2912 dataset.labels.len(),
2913 ),
2914 };
2915 let config = DsaConfig {
2916 window: baseline_selected_row.window,
2917 persistence_runs: baseline_selected_row.persistence_runs,
2918 alert_tau: baseline_selected_row.alert_tau,
2919 corroborating_feature_count_min: 1,
2920 };
2921 let base_evaluation = evaluate_dsa_with_policy(
2922 dataset,
2923 nominal,
2924 residuals,
2925 signs,
2926 baselines,
2927 grammar,
2928 &config,
2929 pre_failure_lookback_runs,
2930 &policy_runtime,
2931 )?;
2932 let feature_indices = baseline_evaluation
2933 .traces
2934 .iter()
2935 .map(|trace| trace.feature_index)
2936 .collect::<Vec<_>>();
2937 project_dsa_to_cohort(
2938 dataset,
2939 nominal,
2940 residuals,
2941 baselines,
2942 grammar,
2943 &base_evaluation,
2944 &feature_indices,
2945 baseline_selected_row.corroborating_m,
2946 pre_failure_lookback_runs,
2947 &baseline_selected_row.cohort_name,
2948 )
2949}
2950
2951fn newly_recovered_failures(previous: &DsaEvaluation, candidate: &DsaEvaluation) -> Vec<usize> {
2952 let previous_detected = previous
2953 .per_failure_run_signals
2954 .iter()
2955 .filter(|row| row.earliest_dsa_run.is_some())
2956 .map(|row| row.failure_run_index)
2957 .collect::<BTreeSet<_>>();
2958 candidate
2959 .per_failure_run_signals
2960 .iter()
2961 .filter(|row| row.earliest_dsa_run.is_some())
2962 .map(|row| row.failure_run_index)
2963 .filter(|failure_id| !previous_detected.contains(failure_id))
2964 .collect()
2965}
2966
2967fn single_change_iteration_row(
2968 iteration: usize,
2969 change_kind: &str,
2970 change_target: &str,
2971 reason: String,
2972 affected_failures: &[usize],
2973 targets_nuisance_class: &str,
2974 accepted: bool,
2975 previous: &DsaEvaluation,
2976 candidate: &DsaEvaluation,
2977) -> SingleChangeIterationRow {
2978 SingleChangeIterationRow {
2979 iteration,
2980 change_kind: change_kind.into(),
2981 change_target: change_target.into(),
2982 reason,
2983 derived_from_failures: affected_failures
2984 .iter()
2985 .map(|failure_id| failure_id.to_string())
2986 .collect::<Vec<_>>()
2987 .join(","),
2988 targets_nuisance_class: targets_nuisance_class.into(),
2989 affected_failures: affected_failures
2990 .iter()
2991 .map(|failure_id| failure_id.to_string())
2992 .collect::<Vec<_>>()
2993 .join(","),
2994 accepted,
2995 recall: candidate.summary.failure_run_recall,
2996 investigation_points: candidate.summary.alert_point_count,
2997 episode_count: candidate.episode_summary.dsa_episode_count,
2998 precursor_quality: candidate.episode_summary.precursor_quality,
2999 pass_run_nuisance_proxy: candidate.summary.pass_run_nuisance_proxy,
3000 delta_recall: candidate.summary.failure_run_recall as i64
3001 - previous.summary.failure_run_recall as i64,
3002 delta_investigation_points: candidate.summary.alert_point_count as i64
3003 - previous.summary.alert_point_count as i64,
3004 delta_episode_count: candidate.episode_summary.dsa_episode_count as i64
3005 - previous.episode_summary.dsa_episode_count as i64,
3006 delta_precursor_quality: match (
3007 previous.episode_summary.precursor_quality,
3008 candidate.episode_summary.precursor_quality,
3009 ) {
3010 (Some(previous_value), Some(candidate_value)) => Some(candidate_value - previous_value),
3011 _ => None,
3012 },
3013 delta_pass_run_nuisance_proxy: candidate.summary.pass_run_nuisance_proxy
3014 - previous.summary.pass_run_nuisance_proxy,
3015 }
3016}
3017
3018fn build_isolated_nuisance_overrides(
3019 dataset: &PreparedDataset,
3020 evaluation: &DsaEvaluation,
3021 protected_features: &BTreeSet<usize>,
3022) -> Vec<FeaturePolicyOverride> {
3023 let isolated_episode_counts = isolated_pass_episode_counts_by_feature(dataset, evaluation);
3024 let failure_alert_counts = evaluation
3025 .traces
3026 .iter()
3027 .map(|trace| {
3028 (
3029 trace.feature_index,
3030 trace
3031 .dsa_alert
3032 .iter()
3033 .enumerate()
3034 .filter(|(run_index, flag)| dataset.labels[*run_index] == 1 && **flag)
3035 .count(),
3036 )
3037 })
3038 .collect::<BTreeMap<_, _>>();
3039 let pass_review_burden = evaluation
3040 .traces
3041 .iter()
3042 .map(|trace| {
3043 (
3044 trace.feature_index,
3045 trace
3046 .dsa_alert
3047 .iter()
3048 .enumerate()
3049 .filter(|(run_index, flag)| dataset.labels[*run_index] == -1 && **flag)
3050 .count(),
3051 )
3052 })
3053 .collect::<BTreeMap<_, _>>();
3054
3055 let mut overrides = evaluation
3056 .traces
3057 .iter()
3058 .filter(|trace| !protected_features.contains(&trace.feature_index))
3059 .filter_map(|trace| {
3060 let isolated_episode_count = isolated_episode_counts
3061 .get(&trace.feature_index)
3062 .copied()
3063 .unwrap_or(0);
3064 let failure_alert_count = failure_alert_counts
3065 .get(&trace.feature_index)
3066 .copied()
3067 .unwrap_or(0);
3068 let pass_review_count = pass_review_burden
3069 .get(&trace.feature_index)
3070 .copied()
3071 .unwrap_or(0);
3072 if isolated_episode_count == 0 || failure_alert_count > 0 || pass_review_count == 0 {
3073 return None;
3074 }
3075 Some((
3076 isolated_episode_count,
3077 pass_review_count,
3078 FeaturePolicyOverride {
3079 feature_index: trace.feature_index,
3080 feature_name: trace.feature_name.clone(),
3081 alert_class_override: Some(HeuristicAlertClass::Watch),
3082 requires_persistence_override: Some(true),
3083 requires_corroboration_override: Some(true),
3084 minimum_window_override: Some(3),
3085 minimum_hits_override: Some(2),
3086 maximum_allowed_fragmentation_override: Some(0.5),
3087 rescue_eligible: false,
3088 rescue_priority: 0,
3089 allow_watch_only: Some(true),
3090 allow_review_without_escalate: Some(false),
3091 suppress_if_isolated: Some(true),
3092 override_reason: format!(
3093 "Feature drives {} isolated pass-only episode(s) with {} pass-run alert points and no failure-local alert points; clamp to Watch and suppress if isolated.",
3094 isolated_episode_count, pass_review_count
3095 ),
3096 },
3097 ))
3098 })
3099 .collect::<Vec<_>>();
3100 overrides.sort_by(|left, right| {
3101 right
3102 .0
3103 .cmp(&left.0)
3104 .then_with(|| right.1.cmp(&left.1))
3105 .then_with(|| left.2.feature_name.cmp(&right.2.feature_name))
3106 });
3107 overrides
3108 .into_iter()
3109 .take(MAX_FAILURE_DRIVEN_ISOLATED_NUISANCE_OVERRIDES)
3110 .map(|(_, _, override_entry)| override_entry)
3111 .collect()
3112}
3113
3114fn isolated_pass_episode_counts_by_feature(
3115 dataset: &PreparedDataset,
3116 evaluation: &DsaEvaluation,
3117) -> BTreeMap<usize, usize> {
3118 let alerted_features_by_run = (0..dataset.labels.len())
3119 .map(|run_index| {
3120 evaluation
3121 .traces
3122 .iter()
3123 .filter(|trace| trace.dsa_alert[run_index])
3124 .map(|trace| trace.feature_index)
3125 .collect::<Vec<_>>()
3126 })
3127 .collect::<Vec<_>>();
3128 let episode_ranges = episode_ranges(&evaluation.run_signals.primary_run_alert);
3129 let mut counts = BTreeMap::<usize, usize>::new();
3130 for (start, end) in episode_ranges {
3131 if (start..=end).any(|run_index| dataset.labels[run_index] == 1) {
3132 continue;
3133 }
3134 let unique_features = (start..=end)
3135 .flat_map(|run_index| alerted_features_by_run[run_index].iter().copied())
3136 .collect::<BTreeSet<_>>();
3137 if unique_features.len() == 1 {
3138 let feature_index = *unique_features.iter().next().expect("one feature");
3139 *counts.entry(feature_index).or_default() += 1;
3140 }
3141 }
3142 counts
3143}
3144
3145fn build_feature_policy_summary(
3146 metrics: &BenchmarkMetrics,
3147 baseline_ranking: &[FeatureRankingRow],
3148 recall_aware_ranking: &[FeatureRankingRow],
3149 burden_aware_ranking: &[FeatureRankingRow],
3150 dsfb_aware_ranking: &[FeatureRankingRow],
3151 overrides: &[FeaturePolicyOverride],
3152) -> Vec<FeaturePolicySummaryRow> {
3153 let feature_metrics = metrics
3154 .feature_metrics
3155 .iter()
3156 .map(|feature| (feature.feature_index, feature))
3157 .collect::<BTreeMap<_, _>>();
3158 let baseline_by_feature = baseline_ranking
3159 .iter()
3160 .map(|row| (row.feature_index, row))
3161 .collect::<BTreeMap<_, _>>();
3162 let recall_by_feature = recall_aware_ranking
3163 .iter()
3164 .map(|row| (row.feature_index, row))
3165 .collect::<BTreeMap<_, _>>();
3166 let burden_by_feature = burden_aware_ranking
3167 .iter()
3168 .map(|row| (row.feature_index, row))
3169 .collect::<BTreeMap<_, _>>();
3170 let dsfb_by_feature = dsfb_aware_ranking
3171 .iter()
3172 .map(|row| (row.feature_index, row))
3173 .collect::<BTreeMap<_, _>>();
3174
3175 overrides
3176 .iter()
3177 .filter_map(|override_entry| {
3178 let feature_metric = feature_metrics.get(&override_entry.feature_index)?;
3179 Some(FeaturePolicySummaryRow {
3180 feature_index: override_entry.feature_index,
3181 feature_name: override_entry.feature_name.clone(),
3182 compression_rank: baseline_by_feature
3183 .get(&override_entry.feature_index)
3184 .map(|row| row.rank),
3185 recall_aware_rank: recall_by_feature
3186 .get(&override_entry.feature_index)
3187 .map(|row| row.rank),
3188 burden_aware_rank: burden_by_feature
3189 .get(&override_entry.feature_index)
3190 .map(|row| row.rank),
3191 dsfb_aware_rank: dsfb_by_feature
3192 .get(&override_entry.feature_index)
3193 .map(|row| row.rank),
3194 pre_failure_run_hits: feature_metric.pre_failure_run_hits,
3195 motif_precision_proxy: feature_metric.motif_precision_proxy,
3196 missing_fraction: feature_metric.missing_fraction,
3197 rescue_eligible: override_entry.rescue_eligible,
3198 rescue_priority: override_entry.rescue_priority,
3199 alert_class_override: override_entry.alert_class_override,
3200 requires_persistence_override: override_entry.requires_persistence_override,
3201 requires_corroboration_override: override_entry.requires_corroboration_override,
3202 minimum_window_override: override_entry.minimum_window_override,
3203 minimum_hits_override: override_entry.minimum_hits_override,
3204 maximum_allowed_fragmentation_override: override_entry
3205 .maximum_allowed_fragmentation_override,
3206 override_reason: override_entry.override_reason.clone(),
3207 allow_watch_only: override_entry.allow_watch_only,
3208 allow_review_without_escalate: override_entry.allow_review_without_escalate,
3209 suppress_if_isolated: override_entry.suppress_if_isolated,
3210 })
3211 })
3212 .collect()
3213}
3214
3215fn recall_rescue_contribution_by_feature(
3216 baseline_evaluation: &DsaEvaluation,
3217) -> BTreeMap<usize, f64> {
3218 let mut contributions = BTreeMap::<usize, f64>::new();
3219 for signal in baseline_evaluation
3220 .per_failure_run_signals
3221 .iter()
3222 .filter(|signal| signal.earliest_dsa_run.is_none())
3223 {
3224 let Some(feature_index) = signal.max_dsa_score_feature_index else {
3225 continue;
3226 };
3227 *contributions.entry(feature_index).or_default() += 1.0;
3228 }
3229 contributions
3230}
3231
3232fn build_recall_critical_features(
3233 baseline: &DsaEvaluation,
3234 optimized: &DsaEvaluation,
3235 baseline_ranking: &[FeatureRankingRow],
3236 recall_aware_ranking: &[FeatureRankingRow],
3237 feature_policy_overrides: &[FeaturePolicyOverride],
3238 recall_rescue_contributions: &BTreeMap<usize, f64>,
3239) -> Vec<RecallCriticalFeatureRow> {
3240 let optimized_by_failure = optimized
3241 .per_failure_run_signals
3242 .iter()
3243 .map(|row| (row.failure_run_index, row))
3244 .collect::<BTreeMap<_, _>>();
3245 let baseline_rank_by_feature = baseline_ranking
3246 .iter()
3247 .map(|row| (row.feature_index, row.rank))
3248 .collect::<BTreeMap<_, _>>();
3249 let recall_rank_by_feature = recall_aware_ranking
3250 .iter()
3251 .map(|row| (row.feature_index, row.rank))
3252 .collect::<BTreeMap<_, _>>();
3253 let overrides_by_feature = feature_policy_overrides
3254 .iter()
3255 .map(|override_entry| (override_entry.feature_index, override_entry))
3256 .collect::<BTreeMap<_, _>>();
3257
3258 baseline
3259 .per_failure_run_signals
3260 .iter()
3261 .filter(|row| row.earliest_dsa_run.is_none())
3262 .map(|row| {
3263 let feature_index = row.max_dsa_score_feature_index;
3264 let override_entry = feature_index
3265 .and_then(|feature_index| overrides_by_feature.get(&feature_index).copied());
3266 let optimized_row = optimized_by_failure.get(&row.failure_run_index).copied();
3267
3268 RecallCriticalFeatureRow {
3269 failure_run_index: row.failure_run_index,
3270 feature_index,
3271 feature_name: row.max_dsa_score_feature_name.clone(),
3272 compression_rank: feature_index.and_then(|feature_index| {
3273 baseline_rank_by_feature.get(&feature_index).copied()
3274 }),
3275 recall_aware_rank: feature_index
3276 .and_then(|feature_index| recall_rank_by_feature.get(&feature_index).copied()),
3277 max_structural_score: row.max_dsa_score_in_lookback,
3278 resolved_alert_class: row.max_dsa_score_resolved_alert_class.clone(),
3279 policy_state: row.max_dsa_score_policy_state.clone(),
3280 boundary_density_w: row.max_dsa_score_boundary_density_w,
3281 ewma_occupancy_w: row.max_dsa_score_ewma_occupancy_w,
3282 motif_recurrence_w: row.max_dsa_score_motif_recurrence_w,
3283 fragmentation_proxy_w: row.max_dsa_score_fragmentation_proxy_w,
3284 consistent: row.max_dsa_score_consistent,
3285 exact_miss_rule: if row
3286 .max_dsa_score_consistent
3287 .is_some_and(|consistent| !consistent)
3288 && row
3289 .max_dsa_score_resolved_alert_class
3290 .as_deref()
3291 .is_some_and(|class| class == "Watch" || class == "Review")
3292 {
3293 "directional_consistency_gate".into()
3294 } else if row.max_dsa_score_numeric_dsa_alert == Some(false)
3295 && row.max_dsa_score_in_lookback.is_some()
3296 {
3297 "watch_class_near_miss_below_numeric_gate".into()
3298 } else if row.max_dsa_score_in_lookback.unwrap_or(0.0) < 2.0 {
3299 "numeric_score_below_tau".into()
3300 } else {
3301 "policy_state_never_reached_review".into()
3302 },
3303 feature_override_exists: override_entry.is_some(),
3304 rescue_priority: override_entry
3305 .map(|override_entry| override_entry.rescue_priority),
3306 allow_review_without_escalate: override_entry
3307 .and_then(|override_entry| override_entry.allow_review_without_escalate),
3308 bounded_feature_override_would_recover: optimized_row
3309 .is_some_and(|optimized_row| optimized_row.earliest_dsa_run.is_some()),
3310 recovered_after_optimization: optimized_row
3311 .is_some_and(|optimized_row| optimized_row.earliest_dsa_run.is_some()),
3312 optimized_feature_name: optimized_row
3313 .and_then(|optimized_row| optimized_row.earliest_dsa_feature_name.clone()),
3314 recall_rescue_contribution: feature_index
3315 .and_then(|feature_index| {
3316 recall_rescue_contributions.get(&feature_index).copied()
3317 })
3318 .unwrap_or(0.0),
3319 }
3320 })
3321 .collect()
3322}
3323
3324fn pareto_frontier(rows: &[CohortGridResult]) -> Vec<CohortGridResult> {
3325 let recall_floor = 100usize;
3326 let candidate_pool = rows
3327 .iter()
3328 .filter(|row| row.failure_recall >= recall_floor)
3329 .collect::<Vec<_>>();
3330 let candidate_pool = if candidate_pool.is_empty() {
3331 rows.iter().collect::<Vec<_>>()
3332 } else {
3333 candidate_pool
3334 };
3335
3336 let mut frontier = candidate_pool
3337 .iter()
3338 .filter(|row| {
3339 !candidate_pool.iter().any(|other| {
3340 other.grid_row_id != row.grid_row_id
3341 && delta_nuisance_relative(row.ewma_nuisance, other.pass_run_nuisance_proxy)
3342 >= delta_nuisance_relative(row.ewma_nuisance, row.pass_run_nuisance_proxy)
3343 && other.failure_recall >= row.failure_recall
3344 && (delta_nuisance_relative(row.ewma_nuisance, other.pass_run_nuisance_proxy)
3345 > delta_nuisance_relative(row.ewma_nuisance, row.pass_run_nuisance_proxy)
3346 || other.failure_recall > row.failure_recall)
3347 })
3348 })
3349 .map(|row| (*row).clone())
3350 .collect::<Vec<_>>();
3351 frontier.sort_by(|left, right| compare_stage_b_rows(left, right, left.ewma_nuisance));
3352 frontier
3353}
3354
3355fn stage_a_candidates(
3356 rows: &[CohortGridResult],
3357 raw_boundary_nuisance: f64,
3358 current_policy_dsa_nuisance: f64,
3359) -> Vec<CohortGridResult> {
3360 let mut candidates = rows
3361 .iter()
3362 .filter(|row| {
3363 row.pass_run_nuisance_proxy < raw_boundary_nuisance && row.failure_recall >= 100
3364 })
3365 .cloned()
3366 .collect::<Vec<_>>();
3367 candidates
3368 .sort_by(|left, right| compare_stage_a_rows(left, right, current_policy_dsa_nuisance));
3369 candidates
3370}
3371
3372fn stage_b_candidates(
3373 rows: &[CohortGridResult],
3374 ewma_nuisance: f64,
3375 current_policy_dsa_nuisance: f64,
3376) -> Vec<CohortGridResult> {
3377 let mut candidates = rows.to_vec();
3378 candidates.sort_by(|left, right| {
3379 (left.pass_run_nuisance_proxy < ewma_nuisance)
3380 .cmp(&(right.pass_run_nuisance_proxy < ewma_nuisance))
3381 .reverse()
3382 .then_with(|| compare_stage_b_rows(left, right, current_policy_dsa_nuisance))
3383 });
3384 candidates
3385}
3386
3387fn stage1_candidates(
3388 rows: &[CohortGridResult],
3389 operator_baselines: &OperatorBaselines,
3390) -> Vec<CohortGridResult> {
3391 let mut candidates = rows
3392 .iter()
3393 .filter(|row| row.failure_recall >= 100)
3394 .cloned()
3395 .collect::<Vec<_>>();
3396 candidates.sort_by(|left, right| {
3397 compare_operator_rows(left, right, operator_baselines).then_with(|| {
3398 compare_stage_a_rows(
3399 left,
3400 right,
3401 operator_baselines
3402 .current_policy_dsa
3403 .pass_run_nuisance_proxy,
3404 )
3405 })
3406 });
3407 candidates
3408}
3409
3410fn stage2_candidates(
3411 rows: &[CohortGridResult],
3412 operator_baselines: &OperatorBaselines,
3413) -> Vec<CohortGridResult> {
3414 let mut candidates = rows.to_vec();
3415 candidates.sort_by(|left, right| compare_operator_rows(left, right, operator_baselines));
3416 candidates
3417}
3418
3419fn compare_operator_rows(
3420 left: &CohortGridResult,
3421 right: &CohortGridResult,
3422 baselines: &OperatorBaselines,
3423) -> Ordering {
3424 let left_delta_investigation = delta_relative_count(
3425 baselines.baseline_investigation_points,
3426 left.investigation_point_count,
3427 );
3428 let right_delta_investigation = delta_relative_count(
3429 baselines.baseline_investigation_points,
3430 right.investigation_point_count,
3431 );
3432 let left_delta_episode =
3433 delta_relative_count(baselines.baseline_episode_count, left.dsa_episode_count);
3434 let right_delta_episode =
3435 delta_relative_count(baselines.baseline_episode_count, right.dsa_episode_count);
3436 let left_delta_review_points = delta_relative_f64(
3437 baselines.baseline_review_escalate_points_per_pass_run,
3438 left.review_escalate_points_per_pass_run,
3439 );
3440 let right_delta_review_points = delta_relative_f64(
3441 baselines.baseline_review_escalate_points_per_pass_run,
3442 right.review_escalate_points_per_pass_run,
3443 );
3444 let left_delta_review_episodes = delta_relative_f64(
3445 baselines.baseline_review_escalate_episodes_per_pass_run,
3446 left.review_escalate_episodes_per_pass_run,
3447 );
3448 let right_delta_review_episodes = delta_relative_f64(
3449 baselines.baseline_review_escalate_episodes_per_pass_run,
3450 right.review_escalate_episodes_per_pass_run,
3451 );
3452
3453 right
3454 .failure_recall
3455 .cmp(&left.failure_recall)
3456 .then_with(|| {
3457 right_delta_investigation
3458 .partial_cmp(&left_delta_investigation)
3459 .unwrap_or(Ordering::Equal)
3460 })
3461 .then_with(|| {
3462 right_delta_episode
3463 .partial_cmp(&left_delta_episode)
3464 .unwrap_or(Ordering::Equal)
3465 })
3466 .then_with(|| compare_option_f64(right.precursor_quality, left.precursor_quality))
3467 .then_with(|| {
3468 right_delta_review_points
3469 .partial_cmp(&left_delta_review_points)
3470 .unwrap_or(Ordering::Equal)
3471 })
3472 .then_with(|| {
3473 right_delta_review_episodes
3474 .partial_cmp(&left_delta_review_episodes)
3475 .unwrap_or(Ordering::Equal)
3476 })
3477 .then_with(|| {
3478 delta_nuisance_relative(right.ewma_nuisance, right.pass_run_nuisance_proxy)
3479 .partial_cmp(&delta_nuisance_relative(
3480 left.ewma_nuisance,
3481 left.pass_run_nuisance_proxy,
3482 ))
3483 .unwrap_or(Ordering::Equal)
3484 })
3485 .then_with(|| compare_option_f64(right.mean_lead_time_runs, left.mean_lead_time_runs))
3486 .then_with(|| left.cohort_name.cmp(&right.cohort_name))
3487}
3488
3489fn choose_optimized_row(
3490 stage_b_candidates: &[CohortGridResult],
3491 all_rows: &[CohortGridResult],
3492 ewma_nuisance: f64,
3493 threshold_recall: usize,
3494 current_policy_dsa_nuisance: f64,
3495) -> Option<CohortGridResult> {
3496 stage_b_candidates.first().cloned().or_else(|| {
3497 all_rows.iter().cloned().min_by(|left, right| {
3498 let left_primary_gap = primary_success_gap(left);
3499 let right_primary_gap = primary_success_gap(right);
3500 left_primary_gap
3501 .partial_cmp(&right_primary_gap)
3502 .unwrap_or(Ordering::Equal)
3503 .then_with(|| {
3504 (left.pass_run_nuisance_proxy < ewma_nuisance)
3505 .cmp(&(right.pass_run_nuisance_proxy < ewma_nuisance))
3506 .reverse()
3507 })
3508 .then_with(|| {
3509 let left_recall_gap = threshold_recall.saturating_sub(left.failure_recall);
3510 let right_recall_gap = threshold_recall.saturating_sub(right.failure_recall);
3511 left_recall_gap.cmp(&right_recall_gap)
3512 })
3513 .then_with(|| compare_stage_b_rows(left, right, current_policy_dsa_nuisance))
3514 })
3515 })
3516}
3517
3518fn compare_stage_a_rows(
3519 left: &CohortGridResult,
3520 right: &CohortGridResult,
3521 current_policy_dsa_nuisance: f64,
3522) -> Ordering {
3523 delta_nuisance_relative(right.ewma_nuisance, right.pass_run_nuisance_proxy)
3524 .partial_cmp(&delta_nuisance_relative(
3525 left.ewma_nuisance,
3526 left.pass_run_nuisance_proxy,
3527 ))
3528 .unwrap_or(Ordering::Equal)
3529 .then_with(|| {
3530 delta_nuisance_relative(current_policy_dsa_nuisance, right.pass_run_nuisance_proxy)
3531 .partial_cmp(&delta_nuisance_relative(
3532 current_policy_dsa_nuisance,
3533 left.pass_run_nuisance_proxy,
3534 ))
3535 .unwrap_or(Ordering::Equal)
3536 })
3537 .then_with(|| right.failure_recall.cmp(&left.failure_recall))
3538 .then_with(|| compare_option_f64(right.precursor_quality, left.precursor_quality))
3539 .then_with(|| compare_option_f64(right.mean_lead_time_runs, left.mean_lead_time_runs))
3540 .then_with(|| compare_option_f64(right.compression_ratio, left.compression_ratio))
3541}
3542
3543fn compare_stage_b_rows(
3544 left: &CohortGridResult,
3545 right: &CohortGridResult,
3546 current_policy_dsa_nuisance: f64,
3547) -> Ordering {
3548 right
3549 .failure_recall
3550 .cmp(&left.failure_recall)
3551 .then_with(|| {
3552 delta_nuisance_relative(right.ewma_nuisance, right.pass_run_nuisance_proxy)
3553 .partial_cmp(&delta_nuisance_relative(
3554 left.ewma_nuisance,
3555 left.pass_run_nuisance_proxy,
3556 ))
3557 .unwrap_or(Ordering::Equal)
3558 })
3559 .then_with(|| compare_option_f64(right.precursor_quality, left.precursor_quality))
3560 .then_with(|| compare_option_f64(right.mean_lead_time_runs, left.mean_lead_time_runs))
3561 .then_with(|| compare_option_f64(right.compression_ratio, left.compression_ratio))
3562 .then_with(|| {
3563 delta_nuisance_relative(current_policy_dsa_nuisance, right.pass_run_nuisance_proxy)
3564 .partial_cmp(&delta_nuisance_relative(
3565 current_policy_dsa_nuisance,
3566 left.pass_run_nuisance_proxy,
3567 ))
3568 .unwrap_or(Ordering::Equal)
3569 })
3570}
3571
3572fn rebuild_selected_evaluation_with_policy(
3573 dataset: &PreparedDataset,
3574 nominal: &NominalModel,
3575 residuals: &ResidualSet,
3576 signs: &SignSet,
3577 baselines: &BaselineSet,
3578 grammar: &GrammarSet,
3579 baseline_cohorts: &FeatureCohorts,
3580 recall_aware_cohorts: &FeatureCohorts,
3581 burden_aware_cohorts: &FeatureCohorts,
3582 dsfb_aware_cohorts: &FeatureCohorts,
3583 pre_failure_lookback_runs: usize,
3584 row: &CohortGridResult,
3585 policy_runtime: &DsaPolicyRuntime,
3586) -> Result<DsaEvaluation> {
3587 let cohorts = match row.ranking_strategy.as_str() {
3588 "recall_aware" => recall_aware_cohorts,
3589 "burden_aware" => burden_aware_cohorts,
3590 "dsfb_aware" => dsfb_aware_cohorts,
3591 _ => baseline_cohorts,
3592 };
3593 let base_config = DsaConfig {
3594 window: row.window,
3595 persistence_runs: row.persistence_runs,
3596 alert_tau: row.alert_tau,
3597 corroborating_feature_count_min: 1,
3598 };
3599 let base_evaluation = evaluate_dsa_with_policy(
3600 dataset,
3601 nominal,
3602 residuals,
3603 signs,
3604 baselines,
3605 grammar,
3606 &base_config,
3607 pre_failure_lookback_runs,
3608 policy_runtime,
3609 )?;
3610 let feature_indices = cohort_members(cohorts, &row.cohort_name)
3611 .iter()
3612 .map(|member| member.feature_index)
3613 .collect::<Vec<_>>();
3614 project_dsa_to_cohort(
3615 dataset,
3616 nominal,
3617 residuals,
3618 baselines,
3619 grammar,
3620 &base_evaluation,
3621 &feature_indices,
3622 row.corroborating_m,
3623 pre_failure_lookback_runs,
3624 &row.cohort_name,
3625 )
3626}
3627
3628fn build_missed_failure_diagnostics(
3629 baseline: &DsaEvaluation,
3630 optimized: &DsaEvaluation,
3631 feature_policy_overrides: &[FeaturePolicyOverride],
3632) -> Vec<MissedFailureDiagnosticRow> {
3633 let optimized_by_failure = optimized
3634 .per_failure_run_signals
3635 .iter()
3636 .map(|row| (row.failure_run_index, row))
3637 .collect::<BTreeMap<_, _>>();
3638 let overrides_by_feature = feature_policy_overrides
3639 .iter()
3640 .map(|override_entry| (override_entry.feature_name.as_str(), override_entry))
3641 .collect::<BTreeMap<_, _>>();
3642
3643 baseline
3644 .per_failure_run_signals
3645 .iter()
3646 .filter(|row| row.earliest_dsa_run.is_none())
3647 .map(|row| {
3648 let optimized_row = optimized_by_failure.get(&row.failure_run_index).copied();
3649 let resolved_watch = row
3650 .max_dsa_score_resolved_alert_class
3651 .as_deref()
3652 .is_some_and(|value| value == "Watch" || value == "Review");
3653 let override_entry = row
3654 .max_dsa_score_feature_name
3655 .as_deref()
3656 .and_then(|feature_name| overrides_by_feature.get(feature_name))
3657 .copied();
3658 let fragmentation_ceiling = override_entry.is_some_and(|override_entry| {
3659 row.max_dsa_score_fragmentation_proxy_w.unwrap_or(0.0)
3660 > override_entry
3661 .maximum_allowed_fragmentation_override
3662 .unwrap_or(OPTIMIZATION_RESCUE_FRAGMENTATION)
3663 });
3664 let directional_consistency_gate =
3665 row.max_dsa_score_consistent == Some(false) && resolved_watch;
3666 let policy_suppression = row.max_dsa_score_policy_suppressed.unwrap_or(false)
3667 || (row
3668 .max_dsa_score_policy_state
3669 .as_deref()
3670 .is_some_and(|state| state == "silent")
3671 && resolved_watch);
3672 let persistence_gate = row
3673 .max_dsa_score_policy_state
3674 .as_deref()
3675 .is_some_and(|state| state == "silent")
3676 && row.max_dsa_score_numeric_dsa_alert == Some(false)
3677 && row.max_dsa_score_in_lookback.is_some();
3678 let rescue_eligible = override_entry.is_some();
3679 let recovered_after_optimization =
3680 optimized_row.is_some_and(|optimized_row| optimized_row.earliest_dsa_run.is_some());
3681
3682 MissedFailureDiagnosticRow {
3683 failure_run_index: row.failure_run_index,
3684 nearest_feature_name: row.max_dsa_score_feature_name.clone(),
3685 nearest_feature_score: row.max_dsa_score_in_lookback,
3686 nearest_feature_policy_state: row.max_dsa_score_policy_state.clone(),
3687 nearest_feature_resolved_alert_class: row
3688 .max_dsa_score_resolved_alert_class
3689 .clone(),
3690 nearest_feature_boundary_density_w: row.max_dsa_score_boundary_density_w,
3691 nearest_feature_ewma_occupancy_w: row.max_dsa_score_ewma_occupancy_w,
3692 nearest_feature_motif_recurrence_w: row.max_dsa_score_motif_recurrence_w,
3693 nearest_feature_fragmentation_proxy_w: row.max_dsa_score_fragmentation_proxy_w,
3694 nearest_feature_consistent: row.max_dsa_score_consistent,
3695 ranking_exclusion: false,
3696 cohort_selection: false,
3697 policy_suppression,
3698 fragmentation_ceiling,
3699 directional_consistency_gate,
3700 persistence_gate,
3701 corroboration_threshold: false,
3702 rescue_gate_not_activating: rescue_eligible && !recovered_after_optimization,
3703 exact_miss_rule: if fragmentation_ceiling {
3704 "feature_override_fragmentation_ceiling".into()
3705 } else if directional_consistency_gate {
3706 "directional_consistency_gate".into()
3707 } else if persistence_gate {
3708 "watch_class_near_miss_below_numeric_gate".into()
3709 } else if row.max_dsa_score_in_lookback.unwrap_or(0.0) < 2.0 {
3710 "numeric_score_below_tau".into()
3711 } else {
3712 "policy_state_never_reached_review".into()
3713 },
3714 bounded_rescue_would_recover: recovered_after_optimization,
3715 recovered_after_optimization,
3716 optimized_feature_name: optimized_row
3717 .and_then(|row| row.earliest_dsa_feature_name.clone()),
3718 }
3719 })
3720 .collect()
3721}
3722
3723fn build_policy_contribution_analysis(
3724 baseline: &DsaEvaluation,
3725 optimized: &DsaEvaluation,
3726 selected_row: &CohortGridResult,
3727) -> Vec<PolicyContributionAnalysisRow> {
3728 let baseline_missed = baseline
3729 .per_failure_run_signals
3730 .iter()
3731 .filter(|row| row.earliest_dsa_run.is_none())
3732 .map(|row| row.failure_run_index)
3733 .collect::<Vec<_>>();
3734 let optimized_by_failure = optimized
3735 .per_failure_run_signals
3736 .iter()
3737 .map(|row| (row.failure_run_index, row))
3738 .collect::<BTreeMap<_, _>>();
3739 let mut rows = Vec::new();
3740
3741 for contribution in &optimized.motif_policy_contributions {
3742 rows.push(PolicyContributionAnalysisRow {
3743 configuration_role: if selected_row.primary_success {
3744 "best_success".into()
3745 } else {
3746 "best_near_success".into()
3747 },
3748 contribution_type: "motif_nuisance_suppression".into(),
3749 name: contribution.motif_name.clone(),
3750 value: contribution.silent_suppression_points as f64,
3751 note: "silent_suppression_points".into(),
3752 });
3753 rows.push(PolicyContributionAnalysisRow {
3754 configuration_role: if selected_row.primary_success {
3755 "best_success".into()
3756 } else {
3757 "best_near_success".into()
3758 },
3759 contribution_type: "motif_pre_failure_review_or_escalate".into(),
3760 name: contribution.motif_name.clone(),
3761 value: contribution.pre_failure_review_or_escalate_points as f64,
3762 note: "pre_failure_review_or_escalate_points".into(),
3763 });
3764 }
3765
3766 let mut rescued_feature_counts = BTreeMap::<String, usize>::new();
3767 for failure_run_index in baseline_missed {
3768 if let Some(optimized_row) = optimized_by_failure.get(&failure_run_index) {
3769 if let Some(feature_name) = &optimized_row.earliest_dsa_feature_name {
3770 *rescued_feature_counts
3771 .entry(feature_name.clone())
3772 .or_default() += 1;
3773 }
3774 }
3775 }
3776 for (feature_name, count) in rescued_feature_counts {
3777 rows.push(PolicyContributionAnalysisRow {
3778 configuration_role: if selected_row.primary_success {
3779 "best_success".into()
3780 } else {
3781 "best_near_success".into()
3782 },
3783 contribution_type: "rescued_failure_feature".into(),
3784 name: feature_name,
3785 value: count as f64,
3786 note: "recovered baseline-missed failures".into(),
3787 });
3788 }
3789
3790 let mut rescue_transition_counts = BTreeMap::<String, usize>::new();
3791 for trace in &optimized.traces {
3792 for transition in &trace.rescue_transition {
3793 if transition != "none" {
3794 *rescue_transition_counts
3795 .entry(transition.clone())
3796 .or_default() += 1;
3797 }
3798 }
3799 }
3800 for (transition, count) in rescue_transition_counts {
3801 rows.push(PolicyContributionAnalysisRow {
3802 configuration_role: if selected_row.primary_success {
3803 "best_success".into()
3804 } else {
3805 "best_near_success".into()
3806 },
3807 contribution_type: "rescue_transition".into(),
3808 name: transition,
3809 value: count as f64,
3810 note: "rescued feature points".into(),
3811 });
3812 }
3813
3814 rows
3815}
3816
3817fn compute_delta_target_assessment(
3818 selected_row: &CohortGridResult,
3819 stage_a_candidates: &[CohortGridResult],
3820 all_rows: &[CohortGridResult],
3821 current_policy_baseline_row: &CohortGridResult,
3822 metrics: &BenchmarkMetrics,
3823) -> DeltaTargetAssessment {
3824 let ewma_nuisance = metrics.summary.pass_run_ewma_nuisance_rate;
3825 let current_policy_dsa_nuisance = current_policy_baseline_row.pass_run_nuisance_proxy;
3826 let primary_target_nuisance_ceiling = ewma_nuisance * (1.0 - PRIMARY_DELTA_TARGET);
3827 let secondary_target_nuisance_ceiling =
3828 current_policy_dsa_nuisance * (1.0 - SECONDARY_DELTA_TARGET);
3829
3830 let selected_configuration =
3831 delta_candidate_summary(selected_row, ewma_nuisance, current_policy_dsa_nuisance);
3832 let best_recall_103_candidate = all_rows
3833 .iter()
3834 .filter(|row| row.failure_recall >= 103)
3835 .cloned()
3836 .collect::<Vec<_>>()
3837 .into_iter()
3838 .min_by(|left, right| compare_stage_a_rows(left, right, current_policy_dsa_nuisance))
3839 .map(|row| delta_candidate_summary(&row, ewma_nuisance, current_policy_dsa_nuisance));
3840 let best_recall_104_candidate = all_rows
3841 .iter()
3842 .filter(|row| row.failure_recall >= 104)
3843 .cloned()
3844 .collect::<Vec<_>>()
3845 .into_iter()
3846 .min_by(|left, right| compare_stage_a_rows(left, right, current_policy_dsa_nuisance))
3847 .map(|row| delta_candidate_summary(&row, ewma_nuisance, current_policy_dsa_nuisance));
3848 let best_secondary_target_candidate = all_rows
3849 .iter()
3850 .filter(|row| row.failure_recall >= 100)
3851 .cloned()
3852 .max_by(|left, right| {
3853 delta_nuisance_relative(current_policy_dsa_nuisance, left.pass_run_nuisance_proxy)
3854 .partial_cmp(&delta_nuisance_relative(
3855 current_policy_dsa_nuisance,
3856 right.pass_run_nuisance_proxy,
3857 ))
3858 .unwrap_or(Ordering::Equal)
3859 })
3860 .map(|row| delta_candidate_summary(&row, ewma_nuisance, current_policy_dsa_nuisance));
3861 let best_stage_a_delta_candidate = stage_a_candidates
3862 .first()
3863 .map(|row| delta_candidate_summary(row, ewma_nuisance, current_policy_dsa_nuisance));
3864 let best_reachable_pareto_point = best_recall_103_candidate
3865 .clone()
3866 .or_else(|| best_stage_a_delta_candidate.clone())
3867 .unwrap_or_else(|| selected_configuration.clone());
3868
3869 let primary_target_met = selected_configuration.delta_nuisance_vs_ewma >= PRIMARY_DELTA_TARGET
3870 && selected_configuration.failure_recall >= 103;
3871 let ideal_target_met = selected_configuration.delta_nuisance_vs_ewma >= PRIMARY_DELTA_TARGET
3872 && selected_configuration.failure_recall >= 104;
3873 let secondary_target_met = selected_configuration.delta_nuisance_vs_current_dsa
3874 >= SECONDARY_DELTA_TARGET
3875 && selected_configuration.failure_recall >= 100;
3876 let mean_lead_time_ge_ewma = paired_ge(
3877 selected_row.mean_lead_time_runs,
3878 metrics.lead_time_summary.mean_ewma_lead_runs,
3879 );
3880 let mean_lead_time_ge_threshold = paired_ge(
3881 selected_row.mean_lead_time_runs,
3882 metrics.lead_time_summary.mean_threshold_lead_runs,
3883 );
3884
3885 let assessment_note = if primary_target_met {
3886 format!(
3887 "Primary 40% nuisance-reduction target reached on {} with delta_nuisance_vs_ewma {:.4} and recall {}/{}.",
3888 selected_configuration.configuration,
3889 selected_configuration.delta_nuisance_vs_ewma,
3890 selected_configuration.failure_recall,
3891 selected_configuration.failure_runs,
3892 )
3893 } else if let Some(best_recall_103_candidate) = &best_recall_103_candidate {
3894 format!(
3895 "Primary 40% nuisance-reduction target was not reachable in the saved deterministic sweep. The best row retaining recall >= 103/104 was {} with nuisance {:.4}, delta_nuisance_vs_ewma {:.4}, and delta_nuisance_vs_current_dsa {:.4}. Reaching the primary target would require nuisance <= {:.4}; no recall >= 103 row achieved that ceiling.",
3896 best_recall_103_candidate.configuration,
3897 best_recall_103_candidate.pass_run_nuisance_proxy,
3898 best_recall_103_candidate.delta_nuisance_vs_ewma,
3899 best_recall_103_candidate.delta_nuisance_vs_current_dsa,
3900 primary_target_nuisance_ceiling,
3901 )
3902 } else if let Some(best_secondary_target_candidate) = &best_secondary_target_candidate {
3903 format!(
3904 "No recall-preserving row reached the primary 40% delta target. The best row with recall >= 100/104 was {} with delta_nuisance_vs_ewma {:.4} and delta_nuisance_vs_current_dsa {:.4}; the secondary 40% target would require nuisance <= {:.4}.",
3905 best_secondary_target_candidate.configuration,
3906 best_secondary_target_candidate.delta_nuisance_vs_ewma,
3907 best_secondary_target_candidate.delta_nuisance_vs_current_dsa,
3908 secondary_target_nuisance_ceiling,
3909 )
3910 } else {
3911 format!(
3912 "No saved row satisfied even the Stage A recall floor, so the 40% target is unachievable under the current deterministic search."
3913 )
3914 };
3915
3916 DeltaTargetAssessment {
3917 primary_target_definition: predeclared_primary_target(),
3918 secondary_target_definition: predeclared_secondary_target(),
3919 ewma_nuisance_baseline: ewma_nuisance,
3920 current_policy_dsa_nuisance_baseline: current_policy_dsa_nuisance,
3921 primary_delta_target: PRIMARY_DELTA_TARGET,
3922 secondary_delta_target: SECONDARY_DELTA_TARGET,
3923 primary_target_nuisance_ceiling,
3924 secondary_target_nuisance_ceiling,
3925 selected_configuration,
3926 primary_target_met,
3927 ideal_target_met,
3928 secondary_target_met,
3929 mean_lead_time_ge_ewma,
3930 mean_lead_time_ge_threshold,
3931 best_recall_103_candidate,
3932 best_recall_104_candidate,
3933 best_secondary_target_candidate,
3934 best_stage_a_delta_candidate,
3935 best_reachable_pareto_point,
3936 assessment_note,
3937 }
3938}
3939
3940fn build_operator_baselines(
3941 dataset: &PreparedDataset,
3942 grammar: &GrammarSet,
3943 baseline_evaluation: &DsaEvaluation,
3944) -> OperatorBaselines {
3945 let numeric_only_dsa = OperatorBaselineLayer {
3946 name: "numeric_only_dsa".into(),
3947 investigation_points: baseline_evaluation.summary.numeric_alert_point_count,
3948 episode_count: episode_ranges(&baseline_evaluation.run_signals.numeric_primary_run_alert)
3949 .len(),
3950 review_escalate_points_per_pass_run: numeric_alert_points_per_pass_run(
3951 dataset,
3952 baseline_evaluation,
3953 ),
3954 review_escalate_episodes_per_pass_run: numeric_alert_episodes_per_pass_run(
3955 dataset,
3956 baseline_evaluation,
3957 ),
3958 precursor_quality: baseline_evaluation.episode_summary.precursor_quality,
3959 recall: baseline_evaluation
3960 .summary
3961 .numeric_primary_failure_run_recall,
3962 pass_run_nuisance_proxy: baseline_evaluation
3963 .summary
3964 .numeric_primary_pass_run_nuisance_proxy,
3965 };
3966 let current_policy_dsa = OperatorBaselineLayer {
3967 name: "current_policy_dsa".into(),
3968 investigation_points: baseline_evaluation.summary.alert_point_count,
3969 episode_count: baseline_evaluation.episode_summary.dsa_episode_count,
3970 review_escalate_points_per_pass_run: review_escalate_points_per_pass_run(
3971 dataset,
3972 baseline_evaluation,
3973 ),
3974 review_escalate_episodes_per_pass_run: review_escalate_episodes_per_pass_run(
3975 dataset,
3976 baseline_evaluation,
3977 ),
3978 precursor_quality: baseline_evaluation.episode_summary.precursor_quality,
3979 recall: baseline_evaluation.summary.failure_run_recall,
3980 pass_run_nuisance_proxy: baseline_evaluation.summary.pass_run_nuisance_proxy,
3981 };
3982 let raw_boundary = OperatorBaselineLayer {
3983 name: "raw_boundary".into(),
3984 investigation_points: baseline_evaluation.summary.raw_boundary_episode_count,
3985 episode_count: baseline_evaluation.summary.raw_boundary_episode_count,
3986 review_escalate_points_per_pass_run: raw_boundary_points_per_pass_run(dataset, grammar),
3987 review_escalate_episodes_per_pass_run: raw_boundary_episodes_per_pass_run(dataset, grammar),
3988 precursor_quality: None,
3989 recall: 0,
3990 pass_run_nuisance_proxy: baseline_evaluation.summary.raw_boundary_nuisance_proxy,
3991 };
3992
3993 OperatorBaselines {
3994 investigation_baseline_layer: numeric_only_dsa.name.clone(),
3995 episode_baseline_layer: raw_boundary.name.clone(),
3996 review_burden_baseline_layer: current_policy_dsa.name.clone(),
3997 baseline_investigation_points: numeric_only_dsa.investigation_points,
3998 baseline_episode_count: raw_boundary.episode_count,
3999 baseline_review_escalate_points_per_pass_run: current_policy_dsa
4000 .review_escalate_points_per_pass_run,
4001 baseline_review_escalate_episodes_per_pass_run: current_policy_dsa
4002 .review_escalate_episodes_per_pass_run,
4003 baseline_precursor_quality: current_policy_dsa.precursor_quality,
4004 baseline_recall: current_policy_dsa.recall,
4005 numeric_only_dsa,
4006 current_policy_dsa,
4007 raw_boundary,
4008 }
4009}
4010
4011fn compute_operator_delta_targets(
4012 selected_row: &CohortGridResult,
4013 selected_evaluation: &DsaEvaluation,
4014 baselines: &OperatorBaselines,
4015 metrics: &BenchmarkMetrics,
4016) -> OperatorDeltaTargets {
4017 let baseline_precursor_quality = baselines.baseline_precursor_quality;
4018 let optimized_precursor_quality = selected_row.precursor_quality;
4019 let precursor_quality_status =
4020 match (baseline_precursor_quality, optimized_precursor_quality) {
4021 (Some(baseline), Some(optimized)) if optimized > baseline + 1.0e-9 => "improved",
4022 (Some(baseline), Some(optimized)) if (optimized - baseline).abs() <= 1.0e-9 => {
4023 "preserved"
4024 }
4025 (Some(_), Some(_)) => "degraded",
4026 _ => "unavailable",
4027 }
4028 .to_string();
4029
4030 OperatorDeltaTargets {
4031 primary_success_definition:
4032 "Failure coverage must match threshold or remain within one missed run while investigation-worthy burden is reduced materially relative to a structural baseline."
4033 .into(),
4034 recall_tolerance_runs: RECALL_TOLERANCE,
4035 selected_configuration: delta_candidate_summary(
4036 selected_row,
4037 metrics.summary.pass_run_ewma_nuisance_rate,
4038 baselines.current_policy_dsa.pass_run_nuisance_proxy,
4039 ),
4040 baseline_investigation_points: baselines.baseline_investigation_points,
4041 baseline_episode_count: baselines.baseline_episode_count,
4042 baseline_review_points_per_pass_run: baselines
4043 .baseline_review_escalate_points_per_pass_run,
4044 baseline_review_episodes_per_pass_run: baselines
4045 .baseline_review_escalate_episodes_per_pass_run,
4046 optimized_review_escalate_points: selected_row.investigation_point_count,
4047 optimized_episode_count: selected_row.dsa_episode_count,
4048 optimized_review_points_per_pass_run: selected_row.review_escalate_points_per_pass_run,
4049 optimized_review_episodes_per_pass_run: selected_row.review_escalate_episodes_per_pass_run,
4050 delta_investigation_load: delta_relative_count(
4051 baselines.baseline_investigation_points,
4052 selected_row.investigation_point_count,
4053 ),
4054 delta_episode_count: delta_relative_count(
4055 baselines.baseline_episode_count,
4056 selected_row.dsa_episode_count,
4057 ),
4058 delta_review_points_per_pass_run: delta_relative_f64(
4059 baselines.baseline_review_escalate_points_per_pass_run,
4060 selected_row.review_escalate_points_per_pass_run,
4061 ),
4062 delta_review_episodes_per_pass_run: delta_relative_f64(
4063 baselines.baseline_review_escalate_episodes_per_pass_run,
4064 selected_row.review_escalate_episodes_per_pass_run,
4065 ),
4066 precursor_quality_status,
4067 recall_equals_threshold: selected_row.failure_recall
4068 == metrics.summary.failure_runs_with_preceding_threshold_signal,
4069 recall_within_tolerance: selected_row.failure_recall + RECALL_TOLERANCE
4070 >= metrics.summary.failure_runs_with_preceding_threshold_signal,
4071 recall_ge_103: selected_row.failure_recall >= 103,
4072 recall_eq_104: selected_row.failure_recall >= 104,
4073 delta_nuisance_vs_ewma: delta_nuisance_relative(
4074 metrics.summary.pass_run_ewma_nuisance_rate,
4075 selected_row.pass_run_nuisance_proxy,
4076 ),
4077 delta_nuisance_vs_threshold: delta_nuisance_relative(
4078 metrics.summary.pass_run_threshold_nuisance_rate,
4079 selected_row.pass_run_nuisance_proxy,
4080 ),
4081 mean_lead_delta_vs_ewma: selected_row.mean_lead_delta_vs_ewma_runs,
4082 mean_lead_delta_vs_threshold: selected_row.mean_lead_delta_vs_threshold_runs,
4083 median_lead_delta_vs_ewma: selected_row
4084 .median_lead_time_runs
4085 .zip(metrics.lead_time_summary.mean_ewma_lead_runs)
4086 .map(|(selected, ewma)| selected - ewma),
4087 median_lead_delta_vs_threshold: selected_row
4088 .median_lead_time_runs
4089 .zip(metrics.lead_time_summary.mean_threshold_lead_runs)
4090 .map(|(selected, threshold)| selected - threshold),
4091 stable_precursor_lead_time_delta: stable_precursor_lead_time_delta(selected_evaluation),
4092 }
4093}
4094
4095fn build_operator_delta_attainment_matrix(
4096 selected_row: &CohortGridResult,
4097 stage1_candidates: &[CohortGridResult],
4098 stage2_candidates: &[CohortGridResult],
4099 baselines: &OperatorBaselines,
4100 metrics: &BenchmarkMetrics,
4101) -> Vec<OperatorDeltaAttainmentRow> {
4102 let mut rows = Vec::new();
4103 let mut push_row = |configuration_role: &str, row: &CohortGridResult| {
4104 let delta_investigation_load = delta_relative_count(
4105 baselines.baseline_investigation_points,
4106 row.investigation_point_count,
4107 );
4108 let delta_episode_count =
4109 delta_relative_count(baselines.baseline_episode_count, row.dsa_episode_count);
4110 let delta_review_points_per_pass_run = delta_relative_f64(
4111 baselines.baseline_review_escalate_points_per_pass_run,
4112 row.review_escalate_points_per_pass_run,
4113 );
4114 let delta_review_episodes_per_pass_run = delta_relative_f64(
4115 baselines.baseline_review_escalate_episodes_per_pass_run,
4116 row.review_escalate_episodes_per_pass_run,
4117 );
4118 let precursor_quality_status =
4119 match (baselines.baseline_precursor_quality, row.precursor_quality) {
4120 (Some(baseline), Some(optimized)) if optimized > baseline + 1.0e-9 => "improved",
4121 (Some(baseline), Some(optimized)) if (optimized - baseline).abs() <= 1.0e-9 => {
4122 "preserved"
4123 }
4124 (Some(_), Some(_)) => "degraded",
4125 _ => "unavailable",
4126 }
4127 .to_string();
4128 let delta_nuisance_vs_ewma = delta_nuisance_relative(
4129 metrics.summary.pass_run_ewma_nuisance_rate,
4130 row.pass_run_nuisance_proxy,
4131 );
4132 rows.push(OperatorDeltaAttainmentRow {
4133 configuration_role: configuration_role.into(),
4134 configuration: row_label(row),
4135 delta_investigation_load,
4136 delta_episode_count,
4137 delta_review_points_per_pass_run,
4138 delta_review_episodes_per_pass_run,
4139 precursor_quality_status: precursor_quality_status.clone(),
4140 recall: row.failure_recall,
4141 mean_lead_time_runs: row.mean_lead_time_runs,
4142 delta_nuisance_vs_ewma,
4143 target_a_investigation_load_ge_040: delta_investigation_load
4144 >= OPERATOR_DELTA_THRESHOLD,
4145 target_b_episode_count_ge_040: delta_episode_count >= OPERATOR_DELTA_THRESHOLD,
4146 target_c_review_points_per_pass_run_ge_040: delta_review_points_per_pass_run
4147 >= OPERATOR_DELTA_THRESHOLD,
4148 target_d_review_episodes_per_pass_run_ge_040: delta_review_episodes_per_pass_run
4149 >= OPERATOR_DELTA_THRESHOLD,
4150 target_e_precursor_quality_preserved_or_improved: precursor_quality_status
4151 != "degraded",
4152 target_f_recall_ge_103: row.failure_recall >= 103,
4153 target_g_recall_eq_104: row.failure_recall >= 104,
4154 target_h_nuisance_ge_015: delta_nuisance_vs_ewma >= 0.15,
4155 target_h_nuisance_ge_025: delta_nuisance_vs_ewma >= 0.25,
4156 target_h_nuisance_ge_040: delta_nuisance_vs_ewma >= 0.40,
4157 target_i_stable_precursor_lead_improved: None,
4158 });
4159 };
4160 push_row("selected", selected_row);
4161 if let Some(row) = stage1_candidates.first() {
4162 push_row("stage1_best", row);
4163 }
4164 if let Some(row) = stage2_candidates.first() {
4165 push_row("stage2_best", row);
4166 }
4167 rows
4168}
4169
4170fn build_policy_operator_burden_contributions(
4171 dataset: &PreparedDataset,
4172 baseline: &DsaEvaluation,
4173 optimized: &DsaEvaluation,
4174 selected_row: &CohortGridResult,
4175) -> Vec<OperatorBurdenContributionRow> {
4176 let mut rows = Vec::new();
4177 for (role, evaluation) in [("baseline", baseline), ("optimized", optimized)] {
4178 for contribution in &evaluation.motif_policy_contributions {
4179 rows.push(OperatorBurdenContributionRow {
4180 configuration_role: role.into(),
4181 contribution_scope: "motif".into(),
4182 name: contribution.motif_name.clone(),
4183 contribution_type: "review_escalate_burden".into(),
4184 value: contribution.pass_review_or_escalate_points as f64,
4185 note: "pass-run Review/Escalate feature points".into(),
4186 });
4187 rows.push(OperatorBurdenContributionRow {
4188 configuration_role: role.into(),
4189 contribution_scope: "motif".into(),
4190 name: contribution.motif_name.clone(),
4191 contribution_type: "pre_failure_review_escalate".into(),
4192 value: contribution.pre_failure_review_or_escalate_points as f64,
4193 note: "pre-failure Review/Escalate feature points".into(),
4194 });
4195 rows.push(OperatorBurdenContributionRow {
4196 configuration_role: role.into(),
4197 contribution_scope: "motif".into(),
4198 name: contribution.motif_name.clone(),
4199 contribution_type: "silent_suppression".into(),
4200 value: contribution.silent_suppression_points as f64,
4201 note: "explicit silent suppression points".into(),
4202 });
4203 }
4204 for trace in &evaluation.traces {
4205 let burden = trace
4206 .dsa_alert
4207 .iter()
4208 .enumerate()
4209 .filter(|(run_index, flag)| dataset.labels[*run_index] == -1 && **flag)
4210 .count() as f64;
4211 if burden > 0.0 {
4212 rows.push(OperatorBurdenContributionRow {
4213 configuration_role: role.into(),
4214 contribution_scope: "feature".into(),
4215 name: trace.feature_name.clone(),
4216 contribution_type: "pass_run_review_escalate_burden".into(),
4217 value: burden,
4218 note: format!("selected row {}", row_label(selected_row)),
4219 });
4220 }
4221 }
4222 }
4223 rows
4224}
4225
4226fn build_recall_recovery_efficiency(
4227 dataset: &PreparedDataset,
4228 baseline: &DsaEvaluation,
4229 optimized: &DsaEvaluation,
4230 pre_failure_lookback_runs: usize,
4231) -> Vec<RecallRecoveryEfficiencyRow> {
4232 let recovered_failures =
4233 optimized.summary.failure_run_recall as i64 - baseline.summary.failure_run_recall as i64;
4234 let added_review_escalate_points =
4235 optimized.summary.alert_point_count as i64 - baseline.summary.alert_point_count as i64;
4236 let added_episode_count = optimized.episode_summary.dsa_episode_count as i64
4237 - baseline.episode_summary.dsa_episode_count as i64;
4238 let added_review_points_per_pass_run = review_escalate_points_per_pass_run(dataset, optimized)
4239 - review_escalate_points_per_pass_run(dataset, baseline);
4240 let added_review_episodes_per_pass_run =
4241 review_escalate_episodes_per_pass_run(dataset, optimized)
4242 - review_escalate_episodes_per_pass_run(dataset, baseline);
4243 let baseline_pass_nuisance_runs = (baseline.summary.pass_run_nuisance_proxy
4244 * dataset.labels.iter().filter(|label| **label == -1).count() as f64)
4245 .round() as i64;
4246 let optimized_pass_nuisance_runs = (optimized.summary.pass_run_nuisance_proxy
4247 * dataset.labels.iter().filter(|label| **label == -1).count() as f64)
4248 .round() as i64;
4249 let added_nuisance_runs = optimized_pass_nuisance_runs - baseline_pass_nuisance_runs;
4250
4251 let mut rows = vec![RecallRecoveryEfficiencyRow {
4252 failure_run_index: None,
4253 baseline_configuration: "current_policy_dsa".into(),
4254 optimized_configuration: "optimized_policy_dsa".into(),
4255 recovered_failures,
4256 added_review_escalate_points,
4257 added_episode_count,
4258 added_review_points_per_pass_run,
4259 added_review_episodes_per_pass_run,
4260 added_nuisance_runs,
4261 recovered_failures_per_added_review_escalate_point: ratio_if_positive(
4262 recovered_failures,
4263 added_review_escalate_points,
4264 ),
4265 recovered_failures_per_added_episode: ratio_if_positive(
4266 recovered_failures,
4267 added_episode_count,
4268 ),
4269 recovered_failures_per_added_pass_run_burden: if added_review_points_per_pass_run > 0.0 {
4270 Some(recovered_failures as f64 / added_review_points_per_pass_run)
4271 } else {
4272 None
4273 },
4274 recovered_failures_per_added_nuisance_run: ratio_if_positive(
4275 recovered_failures,
4276 added_nuisance_runs,
4277 ),
4278 }];
4279
4280 let optimized_by_failure = optimized
4281 .per_failure_run_signals
4282 .iter()
4283 .map(|row| (row.failure_run_index, row))
4284 .collect::<BTreeMap<_, _>>();
4285 for failure_signal in baseline
4286 .per_failure_run_signals
4287 .iter()
4288 .filter(|row| row.earliest_dsa_run.is_none())
4289 {
4290 let failure_run_index = failure_signal.failure_run_index;
4291 let start = failure_run_index.saturating_sub(pre_failure_lookback_runs);
4292 let end = failure_run_index;
4293 let baseline_review_points = review_escalate_points_in_window(baseline, start, end) as i64;
4294 let optimized_review_points =
4295 review_escalate_points_in_window(optimized, start, end) as i64;
4296 let baseline_pass_review_points =
4297 review_escalate_points_in_pass_window(dataset, baseline, start, end);
4298 let optimized_pass_review_points =
4299 review_escalate_points_in_pass_window(dataset, optimized, start, end);
4300 let baseline_episode_count =
4301 primary_episode_count_in_window(&baseline.run_signals.primary_run_alert, start, end)
4302 as i64;
4303 let optimized_episode_count =
4304 primary_episode_count_in_window(&optimized.run_signals.primary_run_alert, start, end)
4305 as i64;
4306 let pass_runs_in_window = dataset.labels[start..end]
4307 .iter()
4308 .filter(|label| **label == -1)
4309 .count();
4310 let added_review_points_per_pass_run = if pass_runs_in_window > 0 {
4311 (optimized_pass_review_points as f64 - baseline_pass_review_points as f64)
4312 / pass_runs_in_window as f64
4313 } else {
4314 0.0
4315 };
4316 let baseline_review_episodes_per_pass_run =
4317 review_escalate_episodes_per_pass_run_in_window(dataset, baseline, start, end);
4318 let optimized_review_episodes_per_pass_run =
4319 review_escalate_episodes_per_pass_run_in_window(dataset, optimized, start, end);
4320 let added_review_episodes_per_pass_run =
4321 optimized_review_episodes_per_pass_run - baseline_review_episodes_per_pass_run;
4322 let baseline_nuisance_runs =
4323 primary_nuisance_runs_in_window(dataset, baseline, start, end) as i64;
4324 let optimized_nuisance_runs =
4325 primary_nuisance_runs_in_window(dataset, optimized, start, end) as i64;
4326 let recovered = optimized_by_failure
4327 .get(&failure_run_index)
4328 .is_some_and(|row| row.earliest_dsa_run.is_some());
4329 rows.push(RecallRecoveryEfficiencyRow {
4330 failure_run_index: Some(failure_run_index),
4331 baseline_configuration: "current_policy_dsa".into(),
4332 optimized_configuration: "optimized_policy_dsa".into(),
4333 recovered_failures: i64::from(recovered),
4334 added_review_escalate_points: optimized_review_points - baseline_review_points,
4335 added_episode_count: optimized_episode_count - baseline_episode_count,
4336 added_review_points_per_pass_run,
4337 added_review_episodes_per_pass_run,
4338 added_nuisance_runs: optimized_nuisance_runs - baseline_nuisance_runs,
4339 recovered_failures_per_added_review_escalate_point: ratio_if_positive(
4340 i64::from(recovered),
4341 optimized_review_points - baseline_review_points,
4342 ),
4343 recovered_failures_per_added_episode: ratio_if_positive(
4344 i64::from(recovered),
4345 optimized_episode_count - baseline_episode_count,
4346 ),
4347 recovered_failures_per_added_pass_run_burden: if added_review_points_per_pass_run > 0.0
4348 {
4349 Some(1.0 / added_review_points_per_pass_run)
4350 } else {
4351 None
4352 },
4353 recovered_failures_per_added_nuisance_run: ratio_if_positive(
4354 i64::from(recovered),
4355 optimized_nuisance_runs - baseline_nuisance_runs,
4356 ),
4357 });
4358 }
4359
4360 rows
4361}
4362
4363fn ratio_if_positive(numerator: i64, denominator: i64) -> Option<f64> {
4364 (denominator > 0).then_some(numerator as f64 / denominator as f64)
4365}
4366
4367fn stable_precursor_lead_time_delta(selected_evaluation: &DsaEvaluation) -> Option<f64> {
4368 let stable_leads = selected_evaluation
4369 .per_failure_run_signals
4370 .iter()
4371 .filter(|signal| signal.max_dsa_score_motif_recurrence_w.unwrap_or(0.0) >= 0.5)
4372 .filter_map(|signal| signal.dsa_lead_runs.map(|lead| lead as f64))
4373 .collect::<Vec<_>>();
4374 if stable_leads.is_empty() {
4375 None
4376 } else {
4377 Some(stable_leads.iter().sum::<f64>() / stable_leads.len() as f64)
4378 }
4379}
4380
4381fn review_escalate_points_in_window(evaluation: &DsaEvaluation, start: usize, end: usize) -> usize {
4382 evaluation
4383 .traces
4384 .iter()
4385 .map(|trace| {
4386 trace.dsa_alert[start..end]
4387 .iter()
4388 .filter(|flag| **flag)
4389 .count()
4390 })
4391 .sum()
4392}
4393
4394fn review_escalate_points_in_pass_window(
4395 dataset: &PreparedDataset,
4396 evaluation: &DsaEvaluation,
4397 start: usize,
4398 end: usize,
4399) -> usize {
4400 evaluation
4401 .traces
4402 .iter()
4403 .map(|trace| {
4404 trace.dsa_alert[start..end]
4405 .iter()
4406 .enumerate()
4407 .filter(|(offset, flag)| dataset.labels[start + *offset] == -1 && **flag)
4408 .count()
4409 })
4410 .sum()
4411}
4412
4413fn review_escalate_episodes_per_pass_run_in_window(
4414 dataset: &PreparedDataset,
4415 evaluation: &DsaEvaluation,
4416 start: usize,
4417 end: usize,
4418) -> f64 {
4419 let pass_runs = dataset.labels[start..end]
4420 .iter()
4421 .filter(|label| **label == -1)
4422 .count();
4423 if pass_runs == 0 {
4424 return 0.0;
4425 }
4426 primary_episode_count_in_window(&evaluation.run_signals.primary_run_alert, start, end) as f64
4427 / pass_runs as f64
4428}
4429
4430fn primary_episode_count_in_window(signal: &[bool], start: usize, end: usize) -> usize {
4431 episode_ranges(signal)
4432 .into_iter()
4433 .filter(|(episode_start, episode_end)| *episode_start < end && *episode_end >= start)
4434 .count()
4435}
4436
4437fn primary_nuisance_runs_in_window(
4438 dataset: &PreparedDataset,
4439 evaluation: &DsaEvaluation,
4440 start: usize,
4441 end: usize,
4442) -> usize {
4443 evaluation.run_signals.primary_run_alert[start..end]
4444 .iter()
4445 .enumerate()
4446 .filter(|(offset, flag)| dataset.labels[start + *offset] == -1 && **flag)
4447 .count()
4448}
4449
4450fn episode_ranges(signal: &[bool]) -> Vec<(usize, usize)> {
4451 let mut episodes = Vec::new();
4452 let mut start = None;
4453 for (index, active) in signal.iter().copied().enumerate() {
4454 match (start, active) {
4455 (None, true) => start = Some(index),
4456 (Some(begin), false) => {
4457 episodes.push((begin, index - 1));
4458 start = None;
4459 }
4460 _ => {}
4461 }
4462 }
4463 if let Some(begin) = start {
4464 episodes.push((begin, signal.len().saturating_sub(1)));
4465 }
4466 episodes
4467}
4468
4469fn delta_relative_count(baseline: usize, optimized: usize) -> f64 {
4470 if baseline == 0 {
4471 0.0
4472 } else {
4473 (baseline as f64 - optimized as f64) / baseline as f64
4474 }
4475}
4476
4477fn delta_relative_f64(baseline: f64, optimized: f64) -> f64 {
4478 if baseline.abs() <= f64::EPSILON {
4479 0.0
4480 } else {
4481 (baseline - optimized) / baseline
4482 }
4483}
4484
4485fn review_escalate_points_per_pass_run(
4486 dataset: &PreparedDataset,
4487 evaluation: &DsaEvaluation,
4488) -> f64 {
4489 let pass_run_count = dataset.labels.iter().filter(|label| **label == -1).count();
4490 if pass_run_count == 0 {
4491 return 0.0;
4492 }
4493 let points = evaluation
4494 .traces
4495 .iter()
4496 .map(|trace| {
4497 trace
4498 .dsa_alert
4499 .iter()
4500 .enumerate()
4501 .filter(|(run_index, flag)| dataset.labels[*run_index] == -1 && **flag)
4502 .count()
4503 })
4504 .sum::<usize>();
4505 points as f64 / pass_run_count as f64
4506}
4507
4508fn numeric_alert_points_per_pass_run(dataset: &PreparedDataset, evaluation: &DsaEvaluation) -> f64 {
4509 let pass_run_count = dataset.labels.iter().filter(|label| **label == -1).count();
4510 if pass_run_count == 0 {
4511 return 0.0;
4512 }
4513 let points = evaluation
4514 .traces
4515 .iter()
4516 .map(|trace| {
4517 trace
4518 .numeric_dsa_alert
4519 .iter()
4520 .enumerate()
4521 .filter(|(run_index, flag)| dataset.labels[*run_index] == -1 && **flag)
4522 .count()
4523 })
4524 .sum::<usize>();
4525 points as f64 / pass_run_count as f64
4526}
4527
4528fn review_escalate_episodes_per_pass_run(
4529 dataset: &PreparedDataset,
4530 evaluation: &DsaEvaluation,
4531) -> f64 {
4532 let pass_run_count = dataset.labels.iter().filter(|label| **label == -1).count();
4533 if pass_run_count == 0 {
4534 return 0.0;
4535 }
4536 let mask = evaluation
4537 .run_signals
4538 .primary_run_alert
4539 .iter()
4540 .enumerate()
4541 .map(|(run_index, flag)| dataset.labels[run_index] == -1 && *flag)
4542 .collect::<Vec<_>>();
4543 episode_ranges(&mask).len() as f64 / pass_run_count as f64
4544}
4545
4546fn numeric_alert_episodes_per_pass_run(
4547 dataset: &PreparedDataset,
4548 evaluation: &DsaEvaluation,
4549) -> f64 {
4550 let pass_run_count = dataset.labels.iter().filter(|label| **label == -1).count();
4551 if pass_run_count == 0 {
4552 return 0.0;
4553 }
4554 let mask = evaluation
4555 .run_signals
4556 .numeric_primary_run_alert
4557 .iter()
4558 .enumerate()
4559 .map(|(run_index, flag)| dataset.labels[run_index] == -1 && *flag)
4560 .collect::<Vec<_>>();
4561 episode_ranges(&mask).len() as f64 / pass_run_count as f64
4562}
4563
4564fn raw_boundary_points_per_pass_run(dataset: &PreparedDataset, grammar: &GrammarSet) -> f64 {
4565 let pass_run_count = dataset.labels.iter().filter(|label| **label == -1).count();
4566 if pass_run_count == 0 {
4567 return 0.0;
4568 }
4569 let points = grammar
4570 .traces
4571 .iter()
4572 .map(|trace| {
4573 trace
4574 .raw_states
4575 .iter()
4576 .enumerate()
4577 .filter(|(run_index, state)| {
4578 dataset.labels[*run_index] == -1
4579 && **state == crate::grammar::GrammarState::Boundary
4580 })
4581 .count()
4582 })
4583 .sum::<usize>();
4584 points as f64 / pass_run_count as f64
4585}
4586
4587fn raw_boundary_episodes_per_pass_run(dataset: &PreparedDataset, grammar: &GrammarSet) -> f64 {
4588 let pass_run_count = dataset.labels.iter().filter(|label| **label == -1).count();
4589 if pass_run_count == 0 {
4590 return 0.0;
4591 }
4592 let episode_count = grammar
4593 .traces
4594 .iter()
4595 .map(|trace| {
4596 let mask = trace
4597 .raw_states
4598 .iter()
4599 .enumerate()
4600 .map(|(run_index, state)| {
4601 dataset.labels[run_index] == -1
4602 && *state == crate::grammar::GrammarState::Boundary
4603 })
4604 .collect::<Vec<_>>();
4605 episode_ranges(&mask).len()
4606 })
4607 .sum::<usize>();
4608 episode_count as f64 / pass_run_count as f64
4609}
4610
4611fn operator_burden_contribution_by_feature(
4612 dataset: &PreparedDataset,
4613 evaluation: &DsaEvaluation,
4614) -> BTreeMap<usize, f64> {
4615 evaluation
4616 .traces
4617 .iter()
4618 .map(|trace| {
4619 let burden = trace
4620 .dsa_alert
4621 .iter()
4622 .enumerate()
4623 .filter(|(run_index, flag)| dataset.labels[*run_index] == -1 && **flag)
4624 .count() as f64;
4625 (trace.feature_index, burden)
4626 })
4627 .collect()
4628}
4629
4630fn delta_candidate_summary(
4631 row: &CohortGridResult,
4632 ewma_nuisance: f64,
4633 current_policy_dsa_nuisance: f64,
4634) -> DeltaCandidateSummary {
4635 DeltaCandidateSummary {
4636 configuration: row_label(row),
4637 ranking_strategy: row.ranking_strategy.clone(),
4638 cohort_name: row.cohort_name.clone(),
4639 window: row.window,
4640 persistence_runs: row.persistence_runs,
4641 alert_tau: row.alert_tau,
4642 corroborating_m: row.corroborating_m,
4643 failure_recall: row.failure_recall,
4644 failure_runs: row.failure_runs,
4645 pass_run_nuisance_proxy: row.pass_run_nuisance_proxy,
4646 delta_nuisance_vs_ewma: delta_nuisance_relative(ewma_nuisance, row.pass_run_nuisance_proxy),
4647 delta_nuisance_vs_current_dsa: delta_nuisance_relative(
4648 current_policy_dsa_nuisance,
4649 row.pass_run_nuisance_proxy,
4650 ),
4651 mean_lead_time_runs: row.mean_lead_time_runs,
4652 precursor_quality: row.precursor_quality,
4653 compression_ratio: row.compression_ratio,
4654 }
4655}
4656
4657fn delta_nuisance_relative(baseline_nuisance: f64, dsa_nuisance: f64) -> f64 {
4658 if baseline_nuisance.abs() <= f64::EPSILON {
4659 0.0
4660 } else {
4661 (baseline_nuisance - dsa_nuisance) / baseline_nuisance
4662 }
4663}
4664
4665pub fn write_cohort_results_csv(path: &Path, results: &[CohortGridResult]) -> Result<()> {
4666 let mut writer = Writer::from_path(path)?;
4667 writer.write_record([
4668 "ranking_strategy",
4669 "ranking_formula",
4670 "grid_row_id",
4671 "feature_trace_config_id",
4672 "cohort_name",
4673 "cohort_size",
4674 "window",
4675 "persistence_runs",
4676 "alert_tau",
4677 "corroborating_m",
4678 "primary_run_signal",
4679 "failure_recall",
4680 "failure_runs",
4681 "failure_recall_rate",
4682 "threshold_recall",
4683 "ewma_recall",
4684 "failure_recall_delta_vs_threshold",
4685 "failure_recall_delta_vs_ewma",
4686 "mean_lead_time_runs",
4687 "median_lead_time_runs",
4688 "threshold_mean_lead_time_runs",
4689 "ewma_mean_lead_time_runs",
4690 "mean_lead_delta_vs_threshold_runs",
4691 "mean_lead_delta_vs_ewma_runs",
4692 "pass_run_nuisance_proxy",
4693 "numeric_pass_run_nuisance_proxy",
4694 "ewma_nuisance",
4695 "threshold_nuisance",
4696 "pass_run_nuisance_delta_vs_ewma",
4697 "pass_run_nuisance_delta_vs_threshold",
4698 "pass_run_nuisance_delta_vs_numeric_dsa",
4699 "raw_boundary_episode_count",
4700 "dsa_episode_count",
4701 "dsa_episodes_preceding_failure",
4702 "mean_dsa_episode_length_runs",
4703 "max_dsa_episode_length_runs",
4704 "compression_ratio",
4705 "precursor_quality",
4706 "non_escalating_dsa_episode_fraction",
4707 "feature_level_active_points",
4708 "feature_level_alert_points",
4709 "persistence_suppression_fraction",
4710 "numeric_failure_recall",
4711 "policy_vs_numeric_recall_delta",
4712 "watch_point_count",
4713 "review_point_count",
4714 "escalate_point_count",
4715 "silenced_point_count",
4716 "rescued_point_count",
4717 "rescued_watch_to_review_points",
4718 "rescued_review_to_escalate_points",
4719 "primary_success",
4720 "primary_success_reason",
4721 ])?;
4722 for row in results {
4723 writer.write_record([
4724 row.ranking_strategy.clone(),
4725 row.ranking_formula.clone(),
4726 row.grid_row_id.to_string(),
4727 row.feature_trace_config_id.to_string(),
4728 row.cohort_name.clone(),
4729 row.cohort_size.to_string(),
4730 row.window.to_string(),
4731 row.persistence_runs.to_string(),
4732 format!("{:.6}", row.alert_tau),
4733 row.corroborating_m.to_string(),
4734 row.primary_run_signal.clone(),
4735 row.failure_recall.to_string(),
4736 row.failure_runs.to_string(),
4737 format!("{:.6}", row.failure_recall_rate),
4738 row.threshold_recall.to_string(),
4739 row.ewma_recall.to_string(),
4740 row.failure_recall_delta_vs_threshold.to_string(),
4741 row.failure_recall_delta_vs_ewma.to_string(),
4742 format_option_csv(row.mean_lead_time_runs),
4743 format_option_csv(row.median_lead_time_runs),
4744 format_option_csv(row.threshold_mean_lead_time_runs),
4745 format_option_csv(row.ewma_mean_lead_time_runs),
4746 format_option_csv(row.mean_lead_delta_vs_threshold_runs),
4747 format_option_csv(row.mean_lead_delta_vs_ewma_runs),
4748 format!("{:.6}", row.pass_run_nuisance_proxy),
4749 format!("{:.6}", row.numeric_pass_run_nuisance_proxy),
4750 format!("{:.6}", row.ewma_nuisance),
4751 format!("{:.6}", row.threshold_nuisance),
4752 format!("{:.6}", row.pass_run_nuisance_delta_vs_ewma),
4753 format!("{:.6}", row.pass_run_nuisance_delta_vs_threshold),
4754 format!("{:.6}", row.pass_run_nuisance_delta_vs_numeric_dsa),
4755 row.raw_boundary_episode_count.to_string(),
4756 row.dsa_episode_count.to_string(),
4757 row.dsa_episodes_preceding_failure.to_string(),
4758 format_option_csv(row.mean_dsa_episode_length_runs),
4759 row.max_dsa_episode_length_runs.to_string(),
4760 format_option_csv(row.compression_ratio),
4761 format_option_csv(row.precursor_quality),
4762 format_option_csv(row.non_escalating_dsa_episode_fraction),
4763 row.feature_level_active_points.to_string(),
4764 row.feature_level_alert_points.to_string(),
4765 format_option_csv(row.persistence_suppression_fraction),
4766 row.numeric_failure_recall.to_string(),
4767 row.policy_vs_numeric_recall_delta.to_string(),
4768 row.watch_point_count.to_string(),
4769 row.review_point_count.to_string(),
4770 row.escalate_point_count.to_string(),
4771 row.silenced_point_count.to_string(),
4772 row.rescued_point_count.to_string(),
4773 row.rescued_watch_to_review_points.to_string(),
4774 row.rescued_review_to_escalate_points.to_string(),
4775 row.primary_success.to_string(),
4776 row.primary_success_reason.clone(),
4777 ])?;
4778 }
4779 writer.flush()?;
4780 Ok(())
4781}
4782
4783pub fn write_motif_policy_contributions_csv(
4784 path: &Path,
4785 rows: &[CohortMotifPolicyContributionRow],
4786) -> Result<()> {
4787 let mut writer = Writer::from_path(path)?;
4788 writer.write_record([
4789 "grid_row_id",
4790 "cohort_name",
4791 "cohort_size",
4792 "window",
4793 "persistence_runs",
4794 "alert_tau",
4795 "corroborating_m",
4796 "motif_name",
4797 "alert_class_default",
4798 "watch_points",
4799 "review_points",
4800 "escalate_points",
4801 "silent_suppression_points",
4802 "pass_review_or_escalate_points",
4803 "pre_failure_review_or_escalate_points",
4804 ])?;
4805 for row in rows {
4806 writer.write_record([
4807 row.grid_row_id.to_string(),
4808 row.cohort_name.clone(),
4809 row.cohort_size.to_string(),
4810 row.window.to_string(),
4811 row.persistence_runs.to_string(),
4812 format!("{:.6}", row.alert_tau),
4813 row.corroborating_m.to_string(),
4814 row.motif_name.clone(),
4815 format!("{:?}", row.alert_class_default),
4816 row.watch_points.to_string(),
4817 row.review_points.to_string(),
4818 row.escalate_points.to_string(),
4819 row.silent_suppression_points.to_string(),
4820 row.pass_review_or_escalate_points.to_string(),
4821 row.pre_failure_review_or_escalate_points.to_string(),
4822 ])?;
4823 }
4824 writer.flush()?;
4825 Ok(())
4826}
4827
4828pub fn write_precursor_quality_csv(path: &Path, results: &[CohortGridResult]) -> Result<()> {
4829 let mut writer = Writer::from_path(path)?;
4830 writer.write_record([
4831 "cohort_name",
4832 "window",
4833 "persistence_runs",
4834 "alert_tau",
4835 "corroborating_m",
4836 "raw_boundary_episode_count",
4837 "dsa_episode_count",
4838 "dsa_episodes_preceding_failure",
4839 "precursor_quality",
4840 "compression_ratio",
4841 ])?;
4842 for row in results {
4843 writer.write_record([
4844 row.cohort_name.clone(),
4845 row.window.to_string(),
4846 row.persistence_runs.to_string(),
4847 format!("{:.6}", row.alert_tau),
4848 row.corroborating_m.to_string(),
4849 row.raw_boundary_episode_count.to_string(),
4850 row.dsa_episode_count.to_string(),
4851 row.dsa_episodes_preceding_failure.to_string(),
4852 format_option_csv(row.precursor_quality),
4853 format_option_csv(row.compression_ratio),
4854 ])?;
4855 }
4856 writer.flush()?;
4857 Ok(())
4858}
4859
4860pub fn write_failure_analysis_md(path: &Path, analysis: &CohortFailureAnalysis) -> Result<()> {
4861 let content = format!(
4862 "# DSA Cohort Failure Analysis\n\n\
4863 ## Closest near-success configuration\n\n\
4864 - Cohort: {}\n\
4865 - Grid point: {}\n\
4866 - Policy setting: {}\n\
4867 - Nuisance: {:.6}\n\
4868 - Recall: {}\n\
4869 - EWMA nuisance target: {:.6}\n\
4870 - Threshold recall target: {}\n\n\
4871 ## Limiting factor\n\n\
4872 {}\n\n\
4873 ## Cross-feature corroboration effect\n\n\
4874 {}\n\n\
4875 ## Policy vs numeric-only DSA\n\n\
4876 {}\n\n\
4877 ## Ranking quality\n\n\
4878 {}\n\n\
4879 ## All-feature DSA vs cohort DSA\n\n\
4880 {}\n\n\
4881 ## Motif classes most responsible for nuisance\n\n\
4882 {}\n\n\
4883 ## Motif classes most responsible for useful precursor episodes\n\n\
4884 {}\n\n\
4885 ## Best near-success source\n\n\
4886 {}\n",
4887 analysis.closest_cohort,
4888 analysis.closest_grid_point,
4889 analysis.closest_policy_setting,
4890 analysis.closest_nuisance,
4891 analysis.closest_recall,
4892 analysis.ewma_nuisance,
4893 analysis.threshold_recall,
4894 analysis.limiting_factor,
4895 analysis.corroboration_effect,
4896 analysis.policy_vs_numeric_note,
4897 analysis.ranking_quality_note,
4898 analysis.all_feature_dsa_vs_cohort_note,
4899 analysis.nuisance_motif_classes,
4900 analysis.useful_precursor_motif_classes,
4901 analysis.best_near_success_source,
4902 );
4903 std::fs::write(path, content)?;
4904 Ok(())
4905}
4906
4907pub fn write_heuristic_policy_failure_analysis_md(
4908 path: &Path,
4909 analysis: &CohortFailureAnalysis,
4910) -> Result<()> {
4911 write_failure_analysis_md(path, analysis)
4912}
4913
4914pub fn compute_rating_delta_forecast(
4915 dsa: &DsaEvaluation,
4916 metrics: &BenchmarkMetrics,
4917 cohort_summary: Option<&CohortDsaSummary>,
4918) -> RatingDeltaForecast {
4919 let chosen = cohort_summary
4920 .and_then(|summary| summary.selected_configuration.as_ref())
4921 .cloned()
4922 .unwrap_or_else(|| fallback_row_from_dsa(dsa, metrics));
4923 let ewma_nuisance = metrics.summary.pass_run_ewma_nuisance_rate;
4924 let threshold_recall = metrics.summary.failure_runs_with_preceding_threshold_signal;
4925 let best_all_features = cohort_summary
4926 .and_then(best_all_features_row)
4927 .cloned()
4928 .unwrap_or_else(|| fallback_row_from_dsa(dsa, metrics));
4929
4930 let primary_success_met = chosen.pass_run_nuisance_proxy < ewma_nuisance
4931 && chosen.failure_recall + RECALL_TOLERANCE >= threshold_recall;
4932 let secondary_lead_time_vs_ewma = paired_ge(
4933 chosen.mean_lead_time_runs,
4934 metrics.lead_time_summary.mean_ewma_lead_runs,
4935 );
4936 let secondary_lead_time_vs_threshold = paired_ge(
4937 chosen.mean_lead_time_runs,
4938 metrics.lead_time_summary.mean_threshold_lead_runs,
4939 );
4940 let secondary_precursor_quality_vs_all_feature_dsa = compare_option_gt(
4941 chosen.precursor_quality,
4942 best_all_features.precursor_quality,
4943 );
4944 let secondary_compression_material = chosen.compression_ratio.map(|ratio| ratio > 1.0);
4945 let secondary_targets_met = secondary_lead_time_vs_ewma && secondary_lead_time_vs_threshold;
4946
4947 let (achieved_forecast_score, forecast_justification) = if primary_success_met
4948 && secondary_targets_met
4949 {
4950 (
4951 FORECAST_PRIMARY_PLUS_SECONDARY,
4952 format!(
4953 "Primary success met for {}: nuisance {:.4} < EWMA {:.4}, recall {} >= threshold {} - {}. Mean lead {} is at least EWMA {} and threshold {}.",
4954 row_label(&chosen),
4955 chosen.pass_run_nuisance_proxy,
4956 ewma_nuisance,
4957 chosen.failure_recall,
4958 threshold_recall,
4959 RECALL_TOLERANCE,
4960 format_option_f64(chosen.mean_lead_time_runs),
4961 format_option_f64(metrics.lead_time_summary.mean_ewma_lead_runs),
4962 format_option_f64(metrics.lead_time_summary.mean_threshold_lead_runs),
4963 ),
4964 )
4965 } else if primary_success_met {
4966 (
4967 FORECAST_PRIMARY_ONLY,
4968 format!(
4969 "Primary success met for {}: nuisance {:.4} < EWMA {:.4}, recall {} >= threshold {} - {}. Mean lead {} does not meet both secondary lead-time targets.",
4970 row_label(&chosen),
4971 chosen.pass_run_nuisance_proxy,
4972 ewma_nuisance,
4973 chosen.failure_recall,
4974 threshold_recall,
4975 RECALL_TOLERANCE,
4976 format_option_f64(chosen.mean_lead_time_runs),
4977 ),
4978 )
4979 } else if chosen.pass_run_nuisance_proxy < ewma_nuisance {
4980 (
4981 FORECAST_RECALL_SHORTFALL_VALUE,
4982 format!(
4983 "Nuisance improved for {} ({:.4} < EWMA {:.4}) but recall {} is below threshold {} - {}.",
4984 row_label(&chosen),
4985 chosen.pass_run_nuisance_proxy,
4986 ewma_nuisance,
4987 chosen.failure_recall,
4988 threshold_recall,
4989 RECALL_TOLERANCE,
4990 ),
4991 )
4992 } else {
4993 (
4994 CURRENT_BASELINE_SCORE,
4995 format!(
4996 "Primary success condition not met for {}. Nuisance {:.4} vs EWMA {:.4}; recall {} vs threshold {} - {}.",
4997 row_label(&chosen),
4998 chosen.pass_run_nuisance_proxy,
4999 ewma_nuisance,
5000 chosen.failure_recall,
5001 threshold_recall,
5002 RECALL_TOLERANCE,
5003 ),
5004 )
5005 };
5006
5007 RatingDeltaForecast {
5008 current_baseline_score: CURRENT_BASELINE_SCORE,
5009 primary_success_condition: rating_primary_success_condition(),
5010 recall_tolerance_runs: RECALL_TOLERANCE,
5011 chosen_configuration: row_label(&chosen),
5012 primary_success_met,
5013 secondary_targets_met,
5014 secondary_lead_time_vs_ewma,
5015 secondary_lead_time_vs_threshold,
5016 secondary_precursor_quality_vs_all_feature_dsa,
5017 secondary_compression_material,
5018 forecast_score_if_primary_success_only: FORECAST_PRIMARY_ONLY,
5019 forecast_score_if_primary_plus_secondary_success: FORECAST_PRIMARY_PLUS_SECONDARY,
5020 achieved_forecast_score,
5021 forecast_justification,
5022 category_forecasts: build_category_forecasts(primary_success_met, secondary_targets_met),
5023 supporting_metrics: ForecastSupportingMetrics {
5024 chosen_configuration: row_label(&chosen),
5025 dsa_nuisance: chosen.pass_run_nuisance_proxy,
5026 ewma_nuisance,
5027 dsa_recall: chosen.failure_recall,
5028 threshold_recall,
5029 recall_tolerance_runs: RECALL_TOLERANCE,
5030 dsa_mean_lead_time_runs: chosen.mean_lead_time_runs,
5031 ewma_mean_lead_time_runs: metrics.lead_time_summary.mean_ewma_lead_runs,
5032 threshold_mean_lead_time_runs: metrics.lead_time_summary.mean_threshold_lead_runs,
5033 dsa_precursor_quality: chosen.precursor_quality,
5034 all_feature_dsa_precursor_quality: best_all_features.precursor_quality,
5035 dsa_compression_ratio: chosen.compression_ratio,
5036 all_feature_dsa_compression_ratio: best_all_features.compression_ratio,
5037 },
5038 }
5039}
5040
5041pub fn compute_rating_failure_analysis(
5042 dsa: &DsaEvaluation,
5043 metrics: &BenchmarkMetrics,
5044 cohort_summary: Option<&CohortDsaSummary>,
5045) -> Option<RatingDeltaFailureAnalysis> {
5046 let chosen = cohort_summary
5047 .and_then(|summary| summary.selected_configuration.as_ref())
5048 .cloned()
5049 .unwrap_or_else(|| fallback_row_from_dsa(dsa, metrics));
5050 let ewma_nuisance = metrics.summary.pass_run_ewma_nuisance_rate;
5051 let threshold_recall = metrics.summary.failure_runs_with_preceding_threshold_signal;
5052 let primary_success_met = chosen.pass_run_nuisance_proxy < ewma_nuisance
5053 && chosen.failure_recall + RECALL_TOLERANCE >= threshold_recall;
5054 if primary_success_met {
5055 return None;
5056 }
5057
5058 let nuisance_gap = (chosen.pass_run_nuisance_proxy - ewma_nuisance).max(0.0);
5059 let recall_gap_runs = if chosen.failure_recall + RECALL_TOLERANCE >= threshold_recall {
5060 0
5061 } else {
5062 (threshold_recall - RECALL_TOLERANCE - chosen.failure_recall) as i64
5063 };
5064
5065 Some(RatingDeltaFailureAnalysis {
5066 closest_configuration: row_label(&chosen),
5067 dsa_nuisance: chosen.pass_run_nuisance_proxy,
5068 ewma_nuisance,
5069 dsa_recall: chosen.failure_recall,
5070 threshold_recall,
5071 recall_tolerance_runs: RECALL_TOLERANCE,
5072 nuisance_gap,
5073 recall_gap_runs,
5074 nuisance_missed_by: if nuisance_gap == 0.0 {
5075 "no miss; nuisance target was met".into()
5076 } else if nuisance_gap <= 0.01 {
5077 "small margin".into()
5078 } else {
5079 "large margin".into()
5080 },
5081 recall_preserved: recall_gap_runs == 0,
5082 limiting_factor: determine_rating_limiting_factor(
5083 cohort_summary,
5084 &chosen,
5085 ewma_nuisance,
5086 threshold_recall,
5087 ),
5088 })
5089}
5090
5091pub fn write_rating_failure_analysis_md(
5092 path: &Path,
5093 analysis: &RatingDeltaFailureAnalysis,
5094) -> Result<()> {
5095 let content = format!(
5096 "# DSA Rating Delta Failure Analysis\n\n\
5097 ## Closest near-success configuration\n\n\
5098 - Configuration: {}\n\
5099 - DSA nuisance: {:.6}\n\
5100 - EWMA nuisance: {:.6}\n\
5101 - DSA recall: {}\n\
5102 - Threshold recall: {}\n\
5103 - Recall tolerance: {} run(s)\n\n\
5104 ## Nuisance\n\n\
5105 - Gap from EWMA: {:.6}\n\
5106 - Missed by: {}\n\n\
5107 ## Recall\n\n\
5108 - Recall gap from threshold - tolerance: {}\n\
5109 - Recall preserved: {}\n\n\
5110 ## Limiting factor\n\n\
5111 {}\n",
5112 analysis.closest_configuration,
5113 analysis.dsa_nuisance,
5114 analysis.ewma_nuisance,
5115 analysis.dsa_recall,
5116 analysis.threshold_recall,
5117 analysis.recall_tolerance_runs,
5118 analysis.nuisance_gap,
5119 analysis.nuisance_missed_by,
5120 analysis.recall_gap_runs,
5121 analysis.recall_preserved,
5122 analysis.limiting_factor,
5123 );
5124 std::fs::write(path, content)?;
5125 Ok(())
5126}
5127
5128pub fn cohort_report_section(cohorts: &FeatureCohorts, summary: &CohortDsaSummary) -> String {
5129 let mut out = String::new();
5130 out.push_str("## Feature-Cohort DSA Selection\n\n");
5131 out.push_str(&format!(
5132 "- Ranking formula: `{}`\n- Missingness penalty: {:.1} when `missing_fraction > {:.2}`\n- Selected cohorts: top_4={}, top_8={}, top_16={}, all_features={}\n- Legacy one-run-tolerance cohort gate used inside the bounded sweep: {}\n- Full bounded cohort grid: `W in {{5,10,15}}`, `K in {{2,3,4}}`, `tau in {{2.0,2.5,3.0}}`, `m in {{1,2,3,5}}` where valid\n\n",
5133 summary.ranking_formula,
5134 cohorts.missingness_penalty_value,
5135 cohorts.missingness_penalty_threshold,
5136 cohorts.top_4.len(),
5137 cohorts.top_8.len(),
5138 cohorts.top_16.len(),
5139 cohorts.all_features.len(),
5140 summary.primary_success_condition,
5141 ));
5142
5143 out.push_str("### Seed-feature check\n\n");
5144 for seed in &cohorts.seed_feature_report {
5145 if seed.found_in_ranking {
5146 out.push_str(&format!(
5147 "- {}: rank {}, score {:.4}, top_4={}, top_8={}, top_16={}\n",
5148 seed.feature_name,
5149 seed.rank.unwrap_or(0),
5150 seed.candidate_score.unwrap_or(0.0),
5151 seed.in_top_4,
5152 seed.in_top_8,
5153 seed.in_top_16,
5154 ));
5155 } else {
5156 out.push_str(&format!(
5157 "- {}: not present in the analyzable-feature ranking\n",
5158 seed.feature_name,
5159 ));
5160 }
5161 }
5162 out.push('\n');
5163
5164 out.push_str("### Best row per cohort\n\n");
5165 out.push_str("| Cohort | W | K | tau | m | Recall | Mean lead | Nuisance | Episodes | Compression | Precursor quality | Legacy gate |\n");
5166 out.push_str("|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---|\n");
5167 for best in &summary.best_by_cohort {
5168 let row = &best.best_row;
5169 out.push_str(&format!(
5170 "| {} | {} | {} | {:.1} | {} | {}/{} | {} | {:.4} | {} | {} | {} | {} |\n",
5171 row.cohort_name,
5172 row.window,
5173 row.persistence_runs,
5174 row.alert_tau,
5175 row.corroborating_m,
5176 row.failure_recall,
5177 row.failure_runs,
5178 format_option_f64(row.mean_lead_time_runs),
5179 row.pass_run_nuisance_proxy,
5180 row.dsa_episode_count,
5181 format_option_f64(row.compression_ratio),
5182 format_option_f64(row.precursor_quality),
5183 if row.primary_success { "yes" } else { "no" },
5184 ));
5185 }
5186 out.push('\n');
5187
5188 if let Some(selected) = &summary.selected_configuration {
5189 out.push_str("### Best cohort/grid result\n\n");
5190 out.push_str(&format!(
5191 "- Selected configuration: {}\n- Recall: {}/{}\n- Mean lead: {}\n- Median lead: {}\n- Nuisance: {:.4} versus EWMA {:.4}\n- Compression ratio: {}\n- Precursor quality: {}\n- Legacy one-run-tolerance cohort gate met: {}\n\n",
5192 row_label(selected),
5193 selected.failure_recall,
5194 selected.failure_runs,
5195 format_option_f64(selected.mean_lead_time_runs),
5196 format_option_f64(selected.median_lead_time_runs),
5197 selected.pass_run_nuisance_proxy,
5198 selected.ewma_nuisance,
5199 format_option_f64(selected.compression_ratio),
5200 format_option_f64(selected.precursor_quality),
5201 selected.primary_success,
5202 ));
5203 }
5204
5205 if let Some(failure_analysis) = &summary.failure_analysis {
5206 out.push_str("### Failure analysis\n\n");
5207 out.push_str(&format!(
5208 "- Closest cohort: {}\n- Closest grid point: {}\n- Limiting factor: {}\n- Corroboration effect: {}\n- Ranking quality: {}\n- All-feature vs cohort: {}\n- Best near-success source: {}\n\n",
5209 failure_analysis.closest_cohort,
5210 failure_analysis.closest_grid_point,
5211 failure_analysis.limiting_factor,
5212 failure_analysis.corroboration_effect,
5213 failure_analysis.ranking_quality_note,
5214 failure_analysis.all_feature_dsa_vs_cohort_note,
5215 failure_analysis.best_near_success_source,
5216 ));
5217 }
5218
5219 out.push_str("- Saved artifacts: `dsa_feature_ranking.csv`, `dsa_seed_feature_check.json`, `dsa_feature_cohorts.json`, `dsa_grid_results.csv`, `dsa_cohort_results.csv`, `dsa_cohort_summary.json`, `dsa_cohort_precursor_quality.csv`\n");
5220 if summary.failure_analysis.is_some() {
5221 out.push_str("- Failure-analysis artifact: `dsa_cohort_failure_analysis.md`\n");
5222 }
5223 out.push('\n');
5224 out
5225}
5226
5227pub fn rating_forecast_report_section(forecast: &RatingDeltaForecast) -> String {
5228 let mut out = String::new();
5229 out.push_str("## Rating Delta Forecast\n\n");
5230 out.push_str(&format!(
5231 "- Primary success condition: {}\n- Primary success met: {}\n- Chosen configuration: {}\n- Forecast score if primary success only: {:.1}\n- Forecast score if primary + secondary success: {:.1}\n- Forecast score under current measured result: {:.1}\n\n",
5232 forecast.primary_success_condition,
5233 forecast.primary_success_met,
5234 forecast.chosen_configuration,
5235 forecast.forecast_score_if_primary_success_only,
5236 forecast.forecast_score_if_primary_plus_secondary_success,
5237 forecast.achieved_forecast_score,
5238 ));
5239 out.push_str("*Forecast only. This is not an achieved score.*\n\n");
5240 out.push_str(&format!("{}\n\n", forecast.forecast_justification));
5241 out.push_str(&format!(
5242 "- DSA nuisance: {:.6}\n- EWMA nuisance: {:.6}\n- DSA recall: {}\n- Threshold recall: {}\n- Recall tolerance: {} run(s)\n- DSA mean lead: {}\n- EWMA mean lead: {}\n- Threshold mean lead: {}\n- DSA precursor quality: {}\n- All-feature DSA precursor quality: {}\n- DSA compression ratio: {}\n- All-feature DSA compression ratio: {}\n\n",
5243 forecast.supporting_metrics.dsa_nuisance,
5244 forecast.supporting_metrics.ewma_nuisance,
5245 forecast.supporting_metrics.dsa_recall,
5246 forecast.supporting_metrics.threshold_recall,
5247 forecast.supporting_metrics.recall_tolerance_runs,
5248 format_option_f64(forecast.supporting_metrics.dsa_mean_lead_time_runs),
5249 format_option_f64(forecast.supporting_metrics.ewma_mean_lead_time_runs),
5250 format_option_f64(forecast.supporting_metrics.threshold_mean_lead_time_runs),
5251 format_option_f64(forecast.supporting_metrics.dsa_precursor_quality),
5252 format_option_f64(forecast.supporting_metrics.all_feature_dsa_precursor_quality),
5253 format_option_f64(forecast.supporting_metrics.dsa_compression_ratio),
5254 format_option_f64(forecast.supporting_metrics.all_feature_dsa_compression_ratio),
5255 ));
5256 out
5257}
5258
5259fn build_grid_row(
5260 grid_row_id: usize,
5261 feature_trace_config_id: usize,
5262 ranking_strategy: &str,
5263 ranking_formula: &str,
5264 cohort_name: &str,
5265 cohort_size: usize,
5266 config: &DsaConfig,
5267 corroborating_m: usize,
5268 dataset: &PreparedDataset,
5269 evaluation: &DsaEvaluation,
5270 metrics: &BenchmarkMetrics,
5271) -> CohortGridResult {
5272 let feature_level_active_points = evaluation
5273 .traces
5274 .iter()
5275 .map(|trace| trace.dsa_active.iter().filter(|flag| **flag).count())
5276 .sum::<usize>();
5277 let feature_level_alert_points = evaluation
5278 .traces
5279 .iter()
5280 .map(|trace| trace.dsa_alert.iter().filter(|flag| **flag).count())
5281 .sum::<usize>();
5282 let threshold_recall = metrics.summary.failure_runs_with_preceding_threshold_signal;
5283 let ewma_recall = metrics.summary.failure_runs_with_preceding_ewma_signal;
5284 let ewma_nuisance = metrics.summary.pass_run_ewma_nuisance_rate;
5285 let threshold_nuisance = metrics.summary.pass_run_threshold_nuisance_rate;
5286 let pass_run_count = dataset.labels.iter().filter(|label| **label == -1).count();
5287 let review_escalate_points_per_pass_run =
5288 review_escalate_points_per_pass_run(dataset, evaluation);
5289 let numeric_alert_points_per_pass_run = numeric_alert_points_per_pass_run(dataset, evaluation);
5290 let review_escalate_episodes_per_pass_run =
5291 review_escalate_episodes_per_pass_run(dataset, evaluation);
5292 let numeric_alert_episodes_per_pass_run =
5293 numeric_alert_episodes_per_pass_run(dataset, evaluation);
5294 let primary_success = evaluation.summary.pass_run_nuisance_proxy < ewma_nuisance
5295 && evaluation.summary.failure_run_recall + RECALL_TOLERANCE >= threshold_recall;
5296
5297 CohortGridResult {
5298 ranking_strategy: ranking_strategy.to_string(),
5299 ranking_formula: ranking_formula.to_string(),
5300 grid_row_id,
5301 feature_trace_config_id,
5302 cohort_name: cohort_name.to_string(),
5303 cohort_size,
5304 window: config.window,
5305 persistence_runs: config.persistence_runs,
5306 alert_tau: config.alert_tau,
5307 corroborating_m,
5308 primary_run_signal: evaluation.run_signals.primary_run_signal.clone(),
5309 failure_recall: evaluation.summary.failure_run_recall,
5310 failure_runs: evaluation.summary.failure_runs,
5311 failure_recall_rate: evaluation.summary.failure_run_recall_rate,
5312 threshold_recall,
5313 ewma_recall,
5314 failure_recall_delta_vs_threshold: evaluation
5315 .comparison_summary
5316 .failure_recall_delta_vs_threshold,
5317 failure_recall_delta_vs_ewma: evaluation.comparison_summary.failure_recall_delta_vs_ewma,
5318 mean_lead_time_runs: evaluation.summary.mean_lead_time_runs,
5319 median_lead_time_runs: evaluation.summary.median_lead_time_runs,
5320 threshold_mean_lead_time_runs: metrics.lead_time_summary.mean_threshold_lead_runs,
5321 ewma_mean_lead_time_runs: metrics.lead_time_summary.mean_ewma_lead_runs,
5322 mean_lead_delta_vs_threshold_runs: evaluation.summary.mean_lead_delta_vs_threshold_runs,
5323 mean_lead_delta_vs_ewma_runs: evaluation.summary.mean_lead_delta_vs_ewma_runs,
5324 pass_run_nuisance_proxy: evaluation.summary.pass_run_nuisance_proxy,
5325 numeric_pass_run_nuisance_proxy: evaluation.summary.numeric_primary_pass_run_nuisance_proxy,
5326 ewma_nuisance,
5327 threshold_nuisance,
5328 pass_run_nuisance_delta_vs_ewma: evaluation.summary.pass_run_nuisance_proxy - ewma_nuisance,
5329 pass_run_nuisance_delta_vs_threshold: evaluation.summary.pass_run_nuisance_proxy
5330 - threshold_nuisance,
5331 pass_run_nuisance_delta_vs_numeric_dsa: evaluation
5332 .comparison_summary
5333 .pass_run_nuisance_delta_vs_numeric_dsa,
5334 raw_boundary_episode_count: evaluation.episode_summary.raw_boundary_episode_count,
5335 dsa_episode_count: evaluation.episode_summary.dsa_episode_count,
5336 dsa_episodes_preceding_failure: evaluation.episode_summary.dsa_episodes_preceding_failure,
5337 mean_dsa_episode_length_runs: evaluation.episode_summary.mean_dsa_episode_length_runs,
5338 max_dsa_episode_length_runs: evaluation.episode_summary.max_dsa_episode_length_runs,
5339 compression_ratio: evaluation.episode_summary.compression_ratio,
5340 precursor_quality: evaluation.episode_summary.precursor_quality,
5341 non_escalating_dsa_episode_fraction: evaluation
5342 .episode_summary
5343 .non_escalating_dsa_episode_fraction,
5344 feature_level_active_points,
5345 feature_level_alert_points,
5346 persistence_suppression_fraction: if feature_level_active_points == 0 {
5347 None
5348 } else {
5349 Some(1.0 - feature_level_alert_points as f64 / feature_level_active_points as f64)
5350 },
5351 numeric_failure_recall: evaluation.summary.numeric_primary_failure_run_recall,
5352 policy_vs_numeric_recall_delta: evaluation
5353 .comparison_summary
5354 .policy_vs_numeric_recall_delta,
5355 watch_point_count: evaluation.summary.watch_point_count,
5356 review_point_count: evaluation.summary.review_point_count,
5357 escalate_point_count: evaluation.summary.escalate_point_count,
5358 investigation_point_count: evaluation.summary.alert_point_count,
5359 numeric_investigation_point_count: evaluation.summary.numeric_alert_point_count,
5360 silenced_point_count: evaluation.summary.silenced_point_count,
5361 rescued_point_count: evaluation.summary.rescued_point_count,
5362 rescued_watch_to_review_points: evaluation.summary.rescued_watch_to_review_points,
5363 rescued_review_to_escalate_points: evaluation.summary.rescued_review_to_escalate_points,
5364 review_escalate_points_per_pass_run: if pass_run_count == 0 {
5365 0.0
5366 } else {
5367 review_escalate_points_per_pass_run
5368 },
5369 numeric_alert_points_per_pass_run: if pass_run_count == 0 {
5370 0.0
5371 } else {
5372 numeric_alert_points_per_pass_run
5373 },
5374 review_escalate_episodes_per_pass_run: if pass_run_count == 0 {
5375 0.0
5376 } else {
5377 review_escalate_episodes_per_pass_run
5378 },
5379 numeric_alert_episodes_per_pass_run: if pass_run_count == 0 {
5380 0.0
5381 } else {
5382 numeric_alert_episodes_per_pass_run
5383 },
5384 primary_success,
5385 primary_success_reason: primary_success_reason(
5386 evaluation.summary.failure_run_recall,
5387 threshold_recall,
5388 evaluation.summary.pass_run_nuisance_proxy,
5389 ewma_nuisance,
5390 ),
5391 }
5392}
5393
5394fn build_motif_policy_rows(
5395 row: &CohortGridResult,
5396 evaluation: &DsaEvaluation,
5397) -> Vec<CohortMotifPolicyContributionRow> {
5398 evaluation
5399 .motif_policy_contributions
5400 .iter()
5401 .map(|contribution| CohortMotifPolicyContributionRow {
5402 grid_row_id: row.grid_row_id,
5403 cohort_name: row.cohort_name.clone(),
5404 cohort_size: row.cohort_size,
5405 window: row.window,
5406 persistence_runs: row.persistence_runs,
5407 alert_tau: row.alert_tau,
5408 corroborating_m: row.corroborating_m,
5409 motif_name: contribution.motif_name.clone(),
5410 alert_class_default: contribution.alert_class_default,
5411 watch_points: contribution.watch_points,
5412 review_points: contribution.review_points,
5413 escalate_points: contribution.escalate_points,
5414 silent_suppression_points: contribution.silent_suppression_points,
5415 pass_review_or_escalate_points: contribution.pass_review_or_escalate_points,
5416 pre_failure_review_or_escalate_points: contribution
5417 .pre_failure_review_or_escalate_points,
5418 })
5419 .collect()
5420}
5421
5422fn build_best_by_cohort(rows: &[CohortGridResult]) -> Vec<CohortBestRow> {
5423 let mut grouped = BTreeMap::<String, Vec<CohortGridResult>>::new();
5424 for row in rows {
5425 grouped
5426 .entry(format!("{} [{}]", row.cohort_name, row.ranking_strategy))
5427 .or_default()
5428 .push(row.clone());
5429 }
5430 grouped
5431 .into_iter()
5432 .filter_map(|(cohort_name, cohort_rows)| {
5433 best_row(&cohort_rows).map(|best_row| CohortBestRow {
5434 cohort_name,
5435 best_row,
5436 })
5437 })
5438 .collect()
5439}
5440
5441fn best_row(rows: &[CohortGridResult]) -> Option<CohortGridResult> {
5442 let success_rows = rows
5443 .iter()
5444 .filter(|row| row.primary_success)
5445 .cloned()
5446 .collect::<Vec<_>>();
5447 if !success_rows.is_empty() {
5448 return success_rows.into_iter().min_by(compare_successful_rows);
5449 }
5450 choose_closest_to_success(rows)
5451}
5452
5453fn choose_closest_to_success(rows: &[CohortGridResult]) -> Option<CohortGridResult> {
5454 rows.iter().cloned().min_by(|left, right| {
5455 primary_success_gap(left)
5456 .partial_cmp(&primary_success_gap(right))
5457 .unwrap_or(Ordering::Equal)
5458 .then_with(|| compare_successful_rows(left, right))
5459 })
5460}
5461
5462fn compare_successful_rows(left: &CohortGridResult, right: &CohortGridResult) -> Ordering {
5463 left.pass_run_nuisance_proxy
5464 .partial_cmp(&right.pass_run_nuisance_proxy)
5465 .unwrap_or(Ordering::Equal)
5466 .then_with(|| right.failure_recall.cmp(&left.failure_recall))
5467 .then_with(|| compare_option_f64(right.mean_lead_time_runs, left.mean_lead_time_runs))
5468 .then_with(|| compare_option_f64(right.precursor_quality, left.precursor_quality))
5469 .then_with(|| compare_option_f64(right.compression_ratio, left.compression_ratio))
5470 .then_with(|| left.cohort_name.cmp(&right.cohort_name))
5471 .then_with(|| left.window.cmp(&right.window))
5472 .then_with(|| left.persistence_runs.cmp(&right.persistence_runs))
5473 .then_with(|| left.corroborating_m.cmp(&right.corroborating_m))
5474}
5475
5476fn primary_success_gap(row: &CohortGridResult) -> f64 {
5477 let nuisance_gap = (row.pass_run_nuisance_proxy - row.ewma_nuisance).max(0.0);
5478 let recall_floor = row.threshold_recall.saturating_sub(RECALL_TOLERANCE);
5479 let recall_gap =
5480 recall_floor.saturating_sub(row.failure_recall) as f64 / row.threshold_recall.max(1) as f64;
5481 nuisance_gap + recall_gap
5482}
5483
5484fn corroboration_effect(rows: &[CohortGridResult]) -> String {
5485 let best_m1 = rows
5486 .iter()
5487 .filter(|row| row.corroborating_m == 1)
5488 .min_by(|left, right| {
5489 primary_success_gap(left)
5490 .partial_cmp(&primary_success_gap(right))
5491 .unwrap_or(Ordering::Equal)
5492 });
5493 let best_m_gt_1 = rows
5494 .iter()
5495 .filter(|row| row.corroborating_m > 1)
5496 .min_by(|left, right| {
5497 primary_success_gap(left)
5498 .partial_cmp(&primary_success_gap(right))
5499 .unwrap_or(Ordering::Equal)
5500 });
5501 match (best_m1, best_m_gt_1) {
5502 (Some(best_m1), Some(best_m_gt_1)) => {
5503 let m1_gap = primary_success_gap(best_m1);
5504 let higher_gap = primary_success_gap(best_m_gt_1);
5505 if higher_gap + 1.0e-9 < m1_gap {
5506 format!(
5507 "Cross-feature corroboration improved the closest result: {} beat {} with gap {:.4} vs {:.4}.",
5508 row_label(best_m_gt_1),
5509 row_label(best_m1),
5510 higher_gap,
5511 m1_gap,
5512 )
5513 } else if m1_gap + 1.0e-9 < higher_gap {
5514 format!(
5515 "Cross-feature corroboration degraded the closest result: {} beat {} with gap {:.4} vs {:.4}.",
5516 row_label(best_m1),
5517 row_label(best_m_gt_1),
5518 m1_gap,
5519 higher_gap,
5520 )
5521 } else {
5522 "Cross-feature corroboration produced effectively tied nuisance/recall trade-offs."
5523 .to_string()
5524 }
5525 }
5526 _ => "Cross-feature corroboration effect could not be separated from the saved sweep."
5527 .to_string(),
5528 }
5529}
5530
5531fn limiting_factor_from_row(
5532 row: Option<&CohortGridResult>,
5533 ewma_nuisance: f64,
5534 threshold_recall: usize,
5535) -> String {
5536 let Some(row) = row else {
5537 return "No cohort row was available for limiting-factor analysis.".into();
5538 };
5539 let nuisance_ok = row.pass_run_nuisance_proxy < ewma_nuisance;
5540 let recall_ok = row.failure_recall + RECALL_TOLERANCE >= threshold_recall;
5541 match (nuisance_ok, recall_ok) {
5542 (false, true) => "Nuisance was the limiting factor.".into(),
5543 (true, false) => "Recall was the limiting factor.".into(),
5544 (false, false) => "Both nuisance and recall remained limiting factors.".into(),
5545 (true, true) => "The legacy one-run-tolerance cohort gate was met on this row.".into(),
5546 }
5547}
5548
5549fn build_failure_analysis(
5550 rows: &[CohortGridResult],
5551 motif_policy_rows: &[CohortMotifPolicyContributionRow],
5552 cohorts: &FeatureCohorts,
5553 ewma_nuisance: f64,
5554 threshold_recall: usize,
5555 selected_row: Option<&CohortGridResult>,
5556 corroboration_effect: &str,
5557 limiting_factor: &str,
5558) -> Option<CohortFailureAnalysis> {
5559 let closest = choose_closest_to_success(rows)?;
5560 let best_all_features = rows
5561 .iter()
5562 .filter(|row| row.cohort_name == "all_features")
5563 .cloned()
5564 .collect::<Vec<_>>();
5565 let best_ranked = rows
5566 .iter()
5567 .filter(|row| row.cohort_name != "all_features")
5568 .cloned()
5569 .collect::<Vec<_>>();
5570 let best_all_features = best_row(&best_all_features);
5571 let best_ranked = best_row(&best_ranked);
5572 let all_feature_dsa_vs_cohort_note = match (&best_all_features, &best_ranked) {
5573 (Some(best_all_features), Some(best_ranked)) => {
5574 let all_gap = primary_success_gap(best_all_features);
5575 let ranked_gap = primary_success_gap(best_ranked);
5576 if ranked_gap + 1.0e-9 < all_gap {
5577 format!(
5578 "Ranked cohort DSA was better than all-feature DSA: {} beat {}.",
5579 row_label(best_ranked),
5580 row_label(best_all_features),
5581 )
5582 } else if all_gap + 1.0e-9 < ranked_gap {
5583 format!(
5584 "All-feature DSA remained better than the ranked cohorts: {} beat {}.",
5585 row_label(best_all_features),
5586 row_label(best_ranked),
5587 )
5588 } else {
5589 "All-feature DSA and the best ranked cohort were effectively tied.".into()
5590 }
5591 }
5592 _ => {
5593 "Not enough saved cohort rows to compare all-feature DSA against ranked cohorts.".into()
5594 }
5595 };
5596
5597 let ranking_reference = best_ranked
5598 .as_ref()
5599 .map(|row| row.cohort_name.clone())
5600 .unwrap_or_else(|| closest.cohort_name.clone());
5601 let ranking_quality_note = ranking_quality_note(cohorts, &ranking_reference);
5602 let best_near_success_source = selected_row
5603 .map(row_label)
5604 .unwrap_or_else(|| row_label(&closest));
5605 let policy_vs_numeric_note = policy_vs_numeric_note(&closest);
5606 let nuisance_motif_classes = dominant_motif_note(motif_policy_rows, closest.grid_row_id, true);
5607 let useful_precursor_motif_classes =
5608 dominant_motif_note(motif_policy_rows, closest.grid_row_id, false);
5609
5610 Some(CohortFailureAnalysis {
5611 closest_cohort: closest.cohort_name.clone(),
5612 closest_grid_point: row_grid_point(&closest),
5613 closest_policy_setting: row_label(&closest),
5614 closest_nuisance: closest.pass_run_nuisance_proxy,
5615 closest_recall: closest.failure_recall,
5616 ewma_nuisance,
5617 threshold_recall,
5618 limiting_factor: limiting_factor.to_string(),
5619 corroboration_effect: corroboration_effect.to_string(),
5620 policy_vs_numeric_note,
5621 ranking_quality_note,
5622 all_feature_dsa_vs_cohort_note,
5623 best_near_success_source,
5624 nuisance_motif_classes,
5625 useful_precursor_motif_classes,
5626 })
5627}
5628
5629fn policy_vs_numeric_note(row: &CohortGridResult) -> String {
5630 if row.pass_run_nuisance_delta_vs_numeric_dsa < 0.0 && row.policy_vs_numeric_recall_delta >= 0 {
5631 format!(
5632 "Policy suppression helped relative to numeric-only DSA: nuisance improved from {:.4} to {:.4} without recall loss ({} to {}).",
5633 row.numeric_pass_run_nuisance_proxy,
5634 row.pass_run_nuisance_proxy,
5635 row.numeric_failure_recall,
5636 row.failure_recall,
5637 )
5638 } else if row.pass_run_nuisance_delta_vs_numeric_dsa < 0.0 {
5639 format!(
5640 "Policy suppression reduced nuisance relative to numeric-only DSA ({:.4} to {:.4}) but lost recall ({} to {}).",
5641 row.numeric_pass_run_nuisance_proxy,
5642 row.pass_run_nuisance_proxy,
5643 row.numeric_failure_recall,
5644 row.failure_recall,
5645 )
5646 } else if row.pass_run_nuisance_delta_vs_numeric_dsa > 0.0 {
5647 format!(
5648 "Policy suppression hurt nuisance relative to numeric-only DSA: {:.4} vs {:.4}.",
5649 row.pass_run_nuisance_proxy, row.numeric_pass_run_nuisance_proxy,
5650 )
5651 } else {
5652 "Policy suppression and numeric-only DSA were effectively tied on pass-run nuisance.".into()
5653 }
5654}
5655
5656fn dominant_motif_note(
5657 motif_policy_rows: &[CohortMotifPolicyContributionRow],
5658 grid_row_id: usize,
5659 nuisance: bool,
5660) -> String {
5661 let mut rows = motif_policy_rows
5662 .iter()
5663 .filter(|row| row.grid_row_id == grid_row_id)
5664 .collect::<Vec<_>>();
5665 if rows.is_empty() {
5666 return "No motif-policy contribution rows were available.".into();
5667 }
5668 rows.sort_by(|left, right| {
5669 let left_score = if nuisance {
5670 left.pass_review_or_escalate_points
5671 } else {
5672 left.pre_failure_review_or_escalate_points
5673 };
5674 let right_score = if nuisance {
5675 right.pass_review_or_escalate_points
5676 } else {
5677 right.pre_failure_review_or_escalate_points
5678 };
5679 right_score
5680 .cmp(&left_score)
5681 .then_with(|| left.motif_name.cmp(&right.motif_name))
5682 });
5683 let top = rows[0];
5684 let score = if nuisance {
5685 top.pass_review_or_escalate_points
5686 } else {
5687 top.pre_failure_review_or_escalate_points
5688 };
5689 if nuisance {
5690 format!(
5691 "{} ({:?}) contributed the most pass-run Review/Escalate points: {}.",
5692 top.motif_name, top.alert_class_default, score
5693 )
5694 } else {
5695 format!(
5696 "{} ({:?}) contributed the most pre-failure Review/Escalate points: {}.",
5697 top.motif_name, top.alert_class_default, score
5698 )
5699 }
5700}
5701
5702fn ranking_quality_note(cohorts: &FeatureCohorts, cohort_name: &str) -> String {
5703 let selected = cohort_members(cohorts, cohort_name);
5704 if selected.is_empty() {
5705 return "Ranking quality could not be assessed because the selected cohort was empty."
5706 .to_string();
5707 }
5708
5709 let selected_violation_ratio = average_ratio(
5710 selected,
5711 |member| member.dsfb_violation_points,
5712 |member| member.dsfb_boundary_points,
5713 );
5714 let selected_threshold_ratio = average_ratio(
5715 selected,
5716 |member| member.threshold_alarm_points,
5717 |member| member.dsfb_boundary_points,
5718 );
5719 let all_violation_ratio = average_ratio(
5720 &cohorts.all_features,
5721 |member| member.dsfb_violation_points,
5722 |member| member.dsfb_boundary_points,
5723 );
5724 let all_threshold_ratio = average_ratio(
5725 &cohorts.all_features,
5726 |member| member.threshold_alarm_points,
5727 |member| member.dsfb_boundary_points,
5728 );
5729
5730 if selected_violation_ratio > all_violation_ratio * 1.25
5731 || selected_threshold_ratio > all_threshold_ratio * 1.25
5732 {
5733 format!(
5734 "Ranking appears to have over-selected noisy features: cohort violation/boundary ratio {:.4} vs all-feature {:.4}, threshold/boundary ratio {:.4} vs all-feature {:.4}.",
5735 selected_violation_ratio,
5736 all_violation_ratio,
5737 selected_threshold_ratio,
5738 all_threshold_ratio,
5739 )
5740 } else {
5741 format!(
5742 "Ranking did not obviously over-select noisy features: cohort violation/boundary ratio {:.4} vs all-feature {:.4}, threshold/boundary ratio {:.4} vs all-feature {:.4}.",
5743 selected_violation_ratio,
5744 all_violation_ratio,
5745 selected_threshold_ratio,
5746 all_threshold_ratio,
5747 )
5748 }
5749}
5750
5751fn rebuild_selected_evaluation(
5752 dataset: &PreparedDataset,
5753 nominal: &NominalModel,
5754 residuals: &ResidualSet,
5755 signs: &SignSet,
5756 baselines: &BaselineSet,
5757 grammar: &GrammarSet,
5758 cohorts: &FeatureCohorts,
5759 pre_failure_lookback_runs: usize,
5760 row: &CohortGridResult,
5761) -> Result<DsaEvaluation> {
5762 let base_config = DsaConfig {
5763 window: row.window,
5764 persistence_runs: row.persistence_runs,
5765 alert_tau: row.alert_tau,
5766 corroborating_feature_count_min: 1,
5767 };
5768 let base_evaluation = evaluate_dsa(
5769 dataset,
5770 nominal,
5771 residuals,
5772 signs,
5773 baselines,
5774 grammar,
5775 &base_config,
5776 pre_failure_lookback_runs,
5777 )?;
5778 let feature_indices = cohort_members(cohorts, &row.cohort_name)
5779 .iter()
5780 .map(|member| member.feature_index)
5781 .collect::<Vec<_>>();
5782 project_dsa_to_cohort(
5783 dataset,
5784 nominal,
5785 residuals,
5786 baselines,
5787 grammar,
5788 &base_evaluation,
5789 &feature_indices,
5790 row.corroborating_m,
5791 pre_failure_lookback_runs,
5792 &row.cohort_name,
5793 )
5794}
5795
5796fn fallback_row_from_dsa(dsa: &DsaEvaluation, metrics: &BenchmarkMetrics) -> CohortGridResult {
5797 CohortGridResult {
5798 ranking_strategy: "selected".into(),
5799 ranking_formula: "selected evaluation".into(),
5800 grid_row_id: 0,
5801 feature_trace_config_id: 0,
5802 cohort_name: "default_all_features".into(),
5803 cohort_size: dsa.summary.analyzable_feature_count,
5804 window: dsa.summary.config.window,
5805 persistence_runs: dsa.summary.config.persistence_runs,
5806 alert_tau: dsa.summary.config.alert_tau,
5807 corroborating_m: dsa.summary.config.corroborating_feature_count_min,
5808 primary_run_signal: dsa.run_signals.primary_run_signal.clone(),
5809 failure_recall: dsa.summary.failure_run_recall,
5810 failure_runs: dsa.summary.failure_runs,
5811 failure_recall_rate: dsa.summary.failure_run_recall_rate,
5812 threshold_recall: metrics.summary.failure_runs_with_preceding_threshold_signal,
5813 ewma_recall: metrics.summary.failure_runs_with_preceding_ewma_signal,
5814 failure_recall_delta_vs_threshold: dsa.comparison_summary.failure_recall_delta_vs_threshold,
5815 failure_recall_delta_vs_ewma: dsa.comparison_summary.failure_recall_delta_vs_ewma,
5816 mean_lead_time_runs: dsa.summary.mean_lead_time_runs,
5817 median_lead_time_runs: dsa.summary.median_lead_time_runs,
5818 threshold_mean_lead_time_runs: metrics.lead_time_summary.mean_threshold_lead_runs,
5819 ewma_mean_lead_time_runs: metrics.lead_time_summary.mean_ewma_lead_runs,
5820 mean_lead_delta_vs_threshold_runs: dsa.summary.mean_lead_delta_vs_threshold_runs,
5821 mean_lead_delta_vs_ewma_runs: dsa.summary.mean_lead_delta_vs_ewma_runs,
5822 pass_run_nuisance_proxy: dsa.summary.pass_run_nuisance_proxy,
5823 numeric_pass_run_nuisance_proxy: dsa.summary.numeric_primary_pass_run_nuisance_proxy,
5824 ewma_nuisance: metrics.summary.pass_run_ewma_nuisance_rate,
5825 threshold_nuisance: metrics.summary.pass_run_threshold_nuisance_rate,
5826 pass_run_nuisance_delta_vs_ewma: dsa.comparison_summary.pass_run_nuisance_delta_vs_ewma,
5827 pass_run_nuisance_delta_vs_threshold: dsa
5828 .comparison_summary
5829 .pass_run_nuisance_delta_vs_threshold,
5830 pass_run_nuisance_delta_vs_numeric_dsa: dsa
5831 .comparison_summary
5832 .pass_run_nuisance_delta_vs_numeric_dsa,
5833 raw_boundary_episode_count: dsa.episode_summary.raw_boundary_episode_count,
5834 dsa_episode_count: dsa.episode_summary.dsa_episode_count,
5835 dsa_episodes_preceding_failure: dsa.episode_summary.dsa_episodes_preceding_failure,
5836 mean_dsa_episode_length_runs: dsa.episode_summary.mean_dsa_episode_length_runs,
5837 max_dsa_episode_length_runs: dsa.episode_summary.max_dsa_episode_length_runs,
5838 compression_ratio: dsa.episode_summary.compression_ratio,
5839 precursor_quality: dsa.episode_summary.precursor_quality,
5840 non_escalating_dsa_episode_fraction: dsa
5841 .episode_summary
5842 .non_escalating_dsa_episode_fraction,
5843 feature_level_active_points: dsa
5844 .traces
5845 .iter()
5846 .map(|trace| trace.dsa_active.iter().filter(|flag| **flag).count())
5847 .sum(),
5848 feature_level_alert_points: dsa
5849 .traces
5850 .iter()
5851 .map(|trace| trace.dsa_alert.iter().filter(|flag| **flag).count())
5852 .sum(),
5853 persistence_suppression_fraction: overall_persistence_suppression_fraction(dsa),
5854 numeric_failure_recall: dsa.summary.numeric_primary_failure_run_recall,
5855 policy_vs_numeric_recall_delta: dsa.comparison_summary.policy_vs_numeric_recall_delta,
5856 watch_point_count: dsa.summary.watch_point_count,
5857 review_point_count: dsa.summary.review_point_count,
5858 escalate_point_count: dsa.summary.escalate_point_count,
5859 investigation_point_count: dsa.summary.alert_point_count,
5860 numeric_investigation_point_count: dsa.summary.numeric_alert_point_count,
5861 silenced_point_count: dsa.summary.silenced_point_count,
5862 rescued_point_count: dsa.summary.rescued_point_count,
5863 rescued_watch_to_review_points: dsa.summary.rescued_watch_to_review_points,
5864 rescued_review_to_escalate_points: dsa.summary.rescued_review_to_escalate_points,
5865 review_escalate_points_per_pass_run: 0.0,
5866 numeric_alert_points_per_pass_run: 0.0,
5867 review_escalate_episodes_per_pass_run: 0.0,
5868 numeric_alert_episodes_per_pass_run: 0.0,
5869 primary_success: dsa.summary.pass_run_nuisance_proxy
5870 < metrics.summary.pass_run_ewma_nuisance_rate
5871 && dsa.summary.failure_run_recall + RECALL_TOLERANCE
5872 >= metrics.summary.failure_runs_with_preceding_threshold_signal,
5873 primary_success_reason: primary_success_reason(
5874 dsa.summary.failure_run_recall,
5875 metrics.summary.failure_runs_with_preceding_threshold_signal,
5876 dsa.summary.pass_run_nuisance_proxy,
5877 metrics.summary.pass_run_ewma_nuisance_rate,
5878 ),
5879 }
5880}
5881
5882fn best_all_features_row(summary: &CohortDsaSummary) -> Option<&CohortGridResult> {
5883 summary
5884 .best_by_cohort
5885 .iter()
5886 .find(|best| best.cohort_name.starts_with("all_features"))
5887 .map(|best| &best.best_row)
5888}
5889
5890fn determine_rating_limiting_factor(
5891 cohort_summary: Option<&CohortDsaSummary>,
5892 chosen: &CohortGridResult,
5893 ewma_nuisance: f64,
5894 threshold_recall: usize,
5895) -> String {
5896 if let Some(summary) = cohort_summary {
5897 let best_all_features = best_all_features_row(summary);
5898 let best_ranked = summary
5899 .best_by_cohort
5900 .iter()
5901 .filter(|best| best.cohort_name != "all_features")
5902 .map(|best| &best.best_row)
5903 .min_by(|left, right| {
5904 primary_success_gap(left)
5905 .partial_cmp(&primary_success_gap(right))
5906 .unwrap_or(Ordering::Equal)
5907 });
5908 if let (Some(best_all_features), Some(best_ranked)) = (best_all_features, best_ranked) {
5909 if primary_success_gap(best_all_features) + 1.0e-9 < primary_success_gap(best_ranked) {
5910 return format!(
5911 "cohort selection: {} stayed closer to the nuisance/recall target than {}",
5912 row_label(best_all_features),
5913 row_label(best_ranked),
5914 );
5915 }
5916 }
5917
5918 let same_cohort_rows = summary
5919 .cohort_results
5920 .iter()
5921 .filter(|row| row.cohort_name == chosen.cohort_name)
5922 .collect::<Vec<_>>();
5923 let any_recall_ok = same_cohort_rows
5924 .iter()
5925 .any(|row| row.failure_recall + RECALL_TOLERANCE >= threshold_recall);
5926 let any_nuisance_ok = same_cohort_rows
5927 .iter()
5928 .any(|row| row.pass_run_nuisance_proxy < ewma_nuisance);
5929 let any_joint_success = same_cohort_rows.iter().any(|row| row.primary_success);
5930 if any_recall_ok && any_nuisance_ok && !any_joint_success {
5931 return format!(
5932 "corroboration threshold: cohort {} required different m values to satisfy recall and nuisance separately, but no single corroboration count satisfied both",
5933 chosen.cohort_name,
5934 );
5935 }
5936 }
5937
5938 if let Some(persistence_suppression_fraction) = chosen.persistence_suppression_fraction {
5939 if persistence_suppression_fraction > 0.25
5940 && chosen.failure_recall + RECALL_TOLERANCE < threshold_recall
5941 {
5942 return format!(
5943 "persistence gate: {:.1}% of feature-level active points were suppressed before alert emission in {}",
5944 persistence_suppression_fraction * 100.0,
5945 row_label(chosen),
5946 );
5947 }
5948 }
5949
5950 format!(
5951 "DSA score composition: even the closest configuration ({}) left nuisance {:.4} vs EWMA {:.4} and recall {} vs threshold {} - {}",
5952 row_label(chosen),
5953 chosen.pass_run_nuisance_proxy,
5954 ewma_nuisance,
5955 chosen.failure_recall,
5956 threshold_recall,
5957 RECALL_TOLERANCE,
5958 )
5959}
5960
5961fn build_category_forecasts(
5962 primary_success_met: bool,
5963 secondary_targets_met: bool,
5964) -> Vec<CategoryForecast> {
5965 if primary_success_met && secondary_targets_met {
5966 vec![
5967 CategoryForecast {
5968 category: "empirical_rigor".into(),
5969 current: "strong".into(),
5970 forecast: "strong".into(),
5971 justification:
5972 "Measured DSA nuisance reduction with recall preservation and lead-time parity strengthens the empirical package."
5973 .into(),
5974 },
5975 CategoryForecast {
5976 category: "operator_usefulness".into(),
5977 current: "moderate".into(),
5978 forecast: "strong".into(),
5979 justification:
5980 "Operator-facing nuisance fell below EWMA while recall stayed near threshold level."
5981 .into(),
5982 },
5983 CategoryForecast {
5984 category: "sbir_readiness".into(),
5985 current: "moderate".into(),
5986 forecast: "strong".into(),
5987 justification:
5988 "A concrete DSA win over scalar monitoring baselines improves commercialization credibility."
5989 .into(),
5990 },
5991 CategoryForecast {
5992 category: "licensing_readiness".into(),
5993 current: "moderate".into(),
5994 forecast: "moderate-strong".into(),
5995 justification:
5996 "Measured operator value supports licensing discussions, while evidence remains bounded to the current benchmark."
5997 .into(),
5998 },
5999 CategoryForecast {
6000 category: "paper_readiness".into(),
6001 current: "moderate".into(),
6002 forecast: "strong".into(),
6003 justification:
6004 "Feature-cohort DSA would add a concrete positive empirical result to the paper narrative."
6005 .into(),
6006 },
6007 ]
6008 } else if primary_success_met {
6009 vec![
6010 CategoryForecast {
6011 category: "empirical_rigor".into(),
6012 current: "strong".into(),
6013 forecast: "strong".into(),
6014 justification:
6015 "Primary success is still a hard empirical result even without full lead-time improvement."
6016 .into(),
6017 },
6018 CategoryForecast {
6019 category: "operator_usefulness".into(),
6020 current: "moderate".into(),
6021 forecast: "moderate-strong".into(),
6022 justification:
6023 "Lower nuisance with preserved recall is a partial operator-facing improvement."
6024 .into(),
6025 },
6026 CategoryForecast {
6027 category: "sbir_readiness".into(),
6028 current: "moderate".into(),
6029 forecast: "moderate-strong".into(),
6030 justification:
6031 "Primary success advances readiness even if secondary improvements are incomplete."
6032 .into(),
6033 },
6034 CategoryForecast {
6035 category: "licensing_readiness".into(),
6036 current: "moderate".into(),
6037 forecast: "moderate".into(),
6038 justification:
6039 "Without stronger secondary metrics the licensing case improves only modestly."
6040 .into(),
6041 },
6042 CategoryForecast {
6043 category: "paper_readiness".into(),
6044 current: "moderate".into(),
6045 forecast: "moderate-strong".into(),
6046 justification:
6047 "A bounded success claim remains paper-relevant even without stronger lead-time gains."
6048 .into(),
6049 },
6050 ]
6051 } else {
6052 vec![
6053 CategoryForecast {
6054 category: "empirical_rigor".into(),
6055 current: "strong".into(),
6056 forecast: "strong".into(),
6057 justification:
6058 "The package remains rigorous even when cohort DSA does not clear the forecast target."
6059 .into(),
6060 },
6061 CategoryForecast {
6062 category: "operator_usefulness".into(),
6063 current: "moderate".into(),
6064 forecast: "moderate".into(),
6065 justification:
6066 "No measured cohort configuration achieved the target nuisance/recall trade-off."
6067 .into(),
6068 },
6069 CategoryForecast {
6070 category: "sbir_readiness".into(),
6071 current: "moderate".into(),
6072 forecast: "moderate".into(),
6073 justification: "Without a concrete DSA win, readiness does not materially change."
6074 .into(),
6075 },
6076 CategoryForecast {
6077 category: "licensing_readiness".into(),
6078 current: "moderate".into(),
6079 forecast: "moderate".into(),
6080 justification: "No measured licensing-relevant delta was demonstrated.".into(),
6081 },
6082 CategoryForecast {
6083 category: "paper_readiness".into(),
6084 current: "moderate".into(),
6085 forecast: "moderate".into(),
6086 justification:
6087 "The negative result remains publishable, but it does not support a stronger forecast."
6088 .into(),
6089 },
6090 ]
6091 }
6092}
6093
6094fn cohort_member(row: &FeatureRankingRow, cohort_name: &str) -> CohortMember {
6095 CohortMember {
6096 feature_index: row.feature_index,
6097 feature_name: row.feature_name.clone(),
6098 ranking_score: row.candidate_score,
6099 dsfb_boundary_points: row.dsfb_raw_boundary_points,
6100 dsfb_violation_points: row.dsfb_raw_violation_points,
6101 ewma_alarm_points: row.ewma_alarm_points,
6102 threshold_alarm_points: row.threshold_alarm_points,
6103 missing_fraction: row.missing_fraction,
6104 reason_for_inclusion: format!(
6105 "Included in {} at rank {} because score {:.4} = z_boundary({:+.4}) - z_violation({:+.4}) + z_ewma({:+.4}) - penalty({:.1}).",
6106 cohort_name,
6107 row.rank,
6108 row.candidate_score,
6109 row.z_boundary,
6110 row.z_violation,
6111 row.z_ewma,
6112 row.missingness_penalty,
6113 ),
6114 }
6115}
6116
6117fn seed_membership_note(row: &FeatureRankingRow, cutoff: usize, cohort_name: &str) -> String {
6118 if row.rank <= cutoff {
6119 format!(
6120 "Included in {} at rank {} with score {:.4}.",
6121 cohort_name, row.rank, row.candidate_score
6122 )
6123 } else {
6124 format!(
6125 "Excluded from {} because rank {} is outside the cutoff. Score {:.4} = z_boundary({:+.4}) - z_violation({:+.4}) + z_ewma({:+.4}) - penalty({:.1}).",
6126 cohort_name,
6127 row.rank,
6128 row.candidate_score,
6129 row.z_boundary,
6130 row.z_violation,
6131 row.z_ewma,
6132 row.missingness_penalty,
6133 )
6134 }
6135}
6136
6137fn cohort_members<'a>(cohorts: &'a FeatureCohorts, cohort_name: &str) -> &'a [CohortMember] {
6138 match cohort_name {
6139 "top_4" => cohorts.top_4.as_slice(),
6140 "top_8" => cohorts.top_8.as_slice(),
6141 "top_16" => cohorts.top_16.as_slice(),
6142 "all_features" => cohorts.all_features.as_slice(),
6143 _ => &[],
6144 }
6145}
6146
6147fn average_ratio<T, FNum, FDen>(items: &[T], numerator: FNum, denominator: FDen) -> f64
6148where
6149 FNum: Fn(&T) -> usize,
6150 FDen: Fn(&T) -> usize,
6151{
6152 if items.is_empty() {
6153 return 0.0;
6154 }
6155 items
6156 .iter()
6157 .map(|item| numerator(item) as f64 / denominator(item).max(1) as f64)
6158 .sum::<f64>()
6159 / items.len() as f64
6160}
6161
6162fn primary_success_reason(
6163 failure_recall: usize,
6164 threshold_recall: usize,
6165 nuisance: f64,
6166 ewma_nuisance: f64,
6167) -> String {
6168 let nuisance_ok = nuisance < ewma_nuisance;
6169 let recall_ok = failure_recall + RECALL_TOLERANCE >= threshold_recall;
6170 if nuisance_ok && recall_ok {
6171 format!(
6172 "Success: nuisance {:.4} < EWMA {:.4} and recall {} >= threshold {} - {}.",
6173 nuisance, ewma_nuisance, failure_recall, threshold_recall, RECALL_TOLERANCE
6174 )
6175 } else {
6176 let mut parts = Vec::new();
6177 if !nuisance_ok {
6178 parts.push(format!(
6179 "nuisance {:.4} >= EWMA {:.4}",
6180 nuisance, ewma_nuisance
6181 ));
6182 }
6183 if !recall_ok {
6184 parts.push(format!(
6185 "recall {} < threshold {} - {}",
6186 failure_recall, threshold_recall, RECALL_TOLERANCE
6187 ));
6188 }
6189 parts.join("; ")
6190 }
6191}
6192
6193fn row_grid_point(row: &CohortGridResult) -> String {
6194 format!(
6195 "W={}, K={}, tau={:.1}, m={}",
6196 row.window, row.persistence_runs, row.alert_tau, row.corroborating_m
6197 )
6198}
6199
6200fn row_label(row: &CohortGridResult) -> String {
6201 format!(
6202 "{} [{}] ({})",
6203 row.cohort_name,
6204 row.ranking_strategy,
6205 row_grid_point(row)
6206 )
6207}
6208
6209fn optimization_priority_order() -> Vec<String> {
6210 vec![
6211 "1. Maximize delta_nuisance_vs_ewma".into(),
6212 "2. Preserve or improve recall toward 103/104 and ideally 104/104".into(),
6213 "3. Maximize precursor quality".into(),
6214 "4. Preserve or improve mean lead time vs EWMA/threshold".into(),
6215 "5. Maintain or improve compression ratio without sacrificing recall badly".into(),
6216 ]
6217}
6218
6219fn predeclared_primary_target() -> String {
6220 format!(
6221 "delta_nuisance_vs_ewma >= {:.2} AND DSA recall >= 103/104, where delta_nuisance_vs_ewma = (EWMA_nuisance - DSA_nuisance) / EWMA_nuisance",
6222 PRIMARY_DELTA_TARGET
6223 )
6224}
6225
6226fn predeclared_secondary_target() -> String {
6227 format!(
6228 "delta_nuisance_vs_current_dsa >= {:.2} AND DSA recall >= 100/104, where delta_nuisance_vs_current_dsa = (current_policy_dsa_nuisance - optimized_dsa_nuisance) / current_policy_dsa_nuisance",
6229 SECONDARY_DELTA_TARGET
6230 )
6231}
6232
6233fn primary_success_condition() -> String {
6234 format!(
6235 "pass-run nuisance < EWMA nuisance AND failure recall >= threshold recall - {} run(s)",
6236 RECALL_TOLERANCE
6237 )
6238}
6239
6240fn rating_primary_success_condition() -> String {
6241 format!(
6242 "DSA pass-run nuisance < EWMA pass-run nuisance AND DSA failure recall >= threshold failure recall - {} run(s)",
6243 RECALL_TOLERANCE
6244 )
6245}
6246
6247fn overall_persistence_suppression_fraction(dsa: &DsaEvaluation) -> Option<f64> {
6248 let active_points = dsa
6249 .traces
6250 .iter()
6251 .map(|trace| trace.dsa_active.iter().filter(|flag| **flag).count())
6252 .sum::<usize>();
6253 let alert_points = dsa
6254 .traces
6255 .iter()
6256 .map(|trace| trace.dsa_alert.iter().filter(|flag| **flag).count())
6257 .sum::<usize>();
6258 if active_points == 0 {
6259 None
6260 } else {
6261 Some(1.0 - alert_points as f64 / active_points as f64)
6262 }
6263}
6264
6265fn compare_option_gt(left: Option<f64>, right: Option<f64>) -> Option<bool> {
6266 Some(left? > right?)
6267}
6268
6269fn format_option_csv(value: Option<f64>) -> String {
6270 value.map(|value| format!("{value:.6}")).unwrap_or_default()
6271}
6272
6273fn format_option_f64(value: Option<f64>) -> String {
6274 value
6275 .map(|value| format!("{value:.4}"))
6276 .unwrap_or_else(|| "n/a".into())
6277}
6278
6279fn paired_ge(left: Option<f64>, right: Option<f64>) -> bool {
6280 matches!((left, right), (Some(left), Some(right)) if left >= right)
6281}
6282
6283fn compare_option_f64(left: Option<f64>, right: Option<f64>) -> Ordering {
6284 match (left, right) {
6285 (Some(left), Some(right)) => left.partial_cmp(&right).unwrap_or(Ordering::Equal),
6286 (Some(_), None) => Ordering::Greater,
6287 (None, Some(_)) => Ordering::Less,
6288 (None, None) => Ordering::Equal,
6289 }
6290}
6291
6292fn mean_std(values: &[f64]) -> (f64, f64) {
6293 if values.is_empty() {
6294 return (0.0, 1.0);
6295 }
6296 let mean = values.iter().sum::<f64>() / values.len() as f64;
6297 let variance = values
6298 .iter()
6299 .map(|value| (value - mean).powi(2))
6300 .sum::<f64>()
6301 / values.len() as f64;
6302 let std = variance.sqrt();
6303 (mean, if std > f64::EPSILON { std } else { 1.0 })
6304}
6305
6306fn z_score(value: f64, mean: f64, std: f64) -> f64 {
6307 (value - mean) / std
6308}
6309
6310#[cfg(test)]
6311mod tests {
6312 use super::*;
6313 use crate::metrics::{
6314 BenchmarkMetrics, BenchmarkSummary, BoundaryEpisodeSummary, DensitySummary, LeadTimeSummary,
6315 };
6316 use crate::preprocessing::DatasetSummary;
6317
6318 fn sample_ranking() -> Vec<FeatureRankingRow> {
6319 vec![
6320 FeatureRankingRow {
6321 ranking_strategy: "compression_biased".into(),
6322 ranking_formula: RANKING_FORMULA.into(),
6323 feature_index: 58,
6324 feature_name: "S059".into(),
6325 dsfb_raw_boundary_points: 682,
6326 dsfb_persistent_boundary_points: 650,
6327 dsfb_raw_violation_points: 31,
6328 dsfb_persistent_violation_points: 4,
6329 ewma_alarm_points: 624,
6330 threshold_alarm_points: 31,
6331 pre_failure_run_hits: 20,
6332 motif_precision_proxy: Some(0.6),
6333 recall_rescue_contribution: None,
6334 operator_burden_contribution: None,
6335 semantic_persistence_contribution: None,
6336 grouped_semantic_support: None,
6337 violation_overdominance_penalty: None,
6338 missing_fraction: 0.0025,
6339 z_pre_failure_run_hits: None,
6340 z_motif_precision_proxy: None,
6341 z_recall_rescue_contribution: None,
6342 z_operator_burden_contribution: None,
6343 z_semantic_persistence_contribution: None,
6344 z_grouped_semantic_support: None,
6345 z_violation_overdominance_penalty: None,
6346 z_boundary: 5.0,
6347 z_violation: -0.1,
6348 z_ewma: 3.0,
6349 missingness_penalty: 0.0,
6350 candidate_score: 8.1,
6351 score_breakdown: "".into(),
6352 rank: 1,
6353 },
6354 FeatureRankingRow {
6355 ranking_strategy: "compression_biased".into(),
6356 ranking_formula: RANKING_FORMULA.into(),
6357 feature_index: 43,
6358 feature_name: "S044".into(),
6359 dsfb_raw_boundary_points: 400,
6360 dsfb_persistent_boundary_points: 380,
6361 dsfb_raw_violation_points: 18,
6362 dsfb_persistent_violation_points: 2,
6363 ewma_alarm_points: 210,
6364 threshold_alarm_points: 18,
6365 pre_failure_run_hits: 14,
6366 motif_precision_proxy: Some(0.5),
6367 recall_rescue_contribution: None,
6368 operator_burden_contribution: None,
6369 semantic_persistence_contribution: None,
6370 grouped_semantic_support: None,
6371 violation_overdominance_penalty: None,
6372 missing_fraction: 0.01,
6373 z_pre_failure_run_hits: None,
6374 z_motif_precision_proxy: None,
6375 z_recall_rescue_contribution: None,
6376 z_operator_burden_contribution: None,
6377 z_semantic_persistence_contribution: None,
6378 z_grouped_semantic_support: None,
6379 z_violation_overdominance_penalty: None,
6380 z_boundary: 1.2,
6381 z_violation: -0.5,
6382 z_ewma: 0.9,
6383 missingness_penalty: 0.0,
6384 candidate_score: 2.6,
6385 score_breakdown: "".into(),
6386 rank: 2,
6387 },
6388 FeatureRankingRow {
6389 ranking_strategy: "compression_biased".into(),
6390 ranking_formula: RANKING_FORMULA.into(),
6391 feature_index: 60,
6392 feature_name: "S061".into(),
6393 dsfb_raw_boundary_points: 340,
6394 dsfb_persistent_boundary_points: 320,
6395 dsfb_raw_violation_points: 18,
6396 dsfb_persistent_violation_points: 1,
6397 ewma_alarm_points: 190,
6398 threshold_alarm_points: 18,
6399 pre_failure_run_hits: 12,
6400 motif_precision_proxy: Some(0.45),
6401 recall_rescue_contribution: None,
6402 operator_burden_contribution: None,
6403 semantic_persistence_contribution: None,
6404 grouped_semantic_support: None,
6405 violation_overdominance_penalty: None,
6406 missing_fraction: 0.01,
6407 z_pre_failure_run_hits: None,
6408 z_motif_precision_proxy: None,
6409 z_recall_rescue_contribution: None,
6410 z_operator_burden_contribution: None,
6411 z_semantic_persistence_contribution: None,
6412 z_grouped_semantic_support: None,
6413 z_violation_overdominance_penalty: None,
6414 z_boundary: 1.0,
6415 z_violation: -0.5,
6416 z_ewma: 0.8,
6417 missingness_penalty: 0.0,
6418 candidate_score: 2.3,
6419 score_breakdown: "".into(),
6420 rank: 3,
6421 },
6422 FeatureRankingRow {
6423 ranking_strategy: "compression_biased".into(),
6424 ranking_formula: RANKING_FORMULA.into(),
6425 feature_index: 221,
6426 feature_name: "S222".into(),
6427 dsfb_raw_boundary_points: 341,
6428 dsfb_persistent_boundary_points: 300,
6429 dsfb_raw_violation_points: 7,
6430 dsfb_persistent_violation_points: 0,
6431 ewma_alarm_points: 160,
6432 threshold_alarm_points: 7,
6433 pre_failure_run_hits: 11,
6434 motif_precision_proxy: Some(0.55),
6435 recall_rescue_contribution: None,
6436 operator_burden_contribution: None,
6437 semantic_persistence_contribution: None,
6438 grouped_semantic_support: None,
6439 violation_overdominance_penalty: None,
6440 missing_fraction: 0.02,
6441 z_pre_failure_run_hits: None,
6442 z_motif_precision_proxy: None,
6443 z_recall_rescue_contribution: None,
6444 z_operator_burden_contribution: None,
6445 z_semantic_persistence_contribution: None,
6446 z_grouped_semantic_support: None,
6447 z_violation_overdominance_penalty: None,
6448 z_boundary: 1.1,
6449 z_violation: -0.8,
6450 z_ewma: 0.6,
6451 missingness_penalty: 0.0,
6452 candidate_score: 2.5,
6453 score_breakdown: "".into(),
6454 rank: 4,
6455 },
6456 ]
6457 }
6458
6459 fn sample_metrics_for_delta_target() -> BenchmarkMetrics {
6460 BenchmarkMetrics {
6461 summary: BenchmarkSummary {
6462 dataset_summary: DatasetSummary {
6463 run_count: 10,
6464 feature_count: 3,
6465 pass_count: 8,
6466 fail_count: 2,
6467 dataset_missing_fraction: 0.0,
6468 healthy_pass_runs_requested: 3,
6469 healthy_pass_runs_found: 3,
6470 },
6471 analyzable_feature_count: 3,
6472 grammar_imputation_suppression_points: 0,
6473 threshold_alarm_points: 0,
6474 ewma_alarm_points: 0,
6475 cusum_alarm_points: 0,
6476 run_energy_alarm_points: 0,
6477 pca_fdc_alarm_points: 0,
6478 dsfb_raw_boundary_points: 0,
6479 dsfb_persistent_boundary_points: 0,
6480 dsfb_raw_violation_points: 0,
6481 dsfb_persistent_violation_points: 0,
6482 failure_runs: 104,
6483 failure_runs_with_preceding_dsfb_raw_signal: 0,
6484 failure_runs_with_preceding_dsfb_persistent_signal: 0,
6485 failure_runs_with_preceding_dsfb_raw_boundary_signal: 0,
6486 failure_runs_with_preceding_dsfb_persistent_boundary_signal: 0,
6487 failure_runs_with_preceding_dsfb_raw_violation_signal: 0,
6488 failure_runs_with_preceding_dsfb_persistent_violation_signal: 0,
6489 failure_runs_with_preceding_ewma_signal: 104,
6490 failure_runs_with_preceding_cusum_signal: 104,
6491 failure_runs_with_preceding_run_energy_signal: 0,
6492 failure_runs_with_preceding_pca_fdc_signal: 103,
6493 failure_runs_with_preceding_threshold_signal: 104,
6494 pass_runs: 731,
6495 pass_runs_with_dsfb_raw_boundary_signal: 0,
6496 pass_runs_with_dsfb_persistent_boundary_signal: 0,
6497 pass_runs_with_dsfb_raw_violation_signal: 0,
6498 pass_runs_with_dsfb_persistent_violation_signal: 0,
6499 pass_runs_with_ewma_signal: 0,
6500 pass_runs_with_cusum_signal: 0,
6501 pass_runs_with_run_energy_signal: 0,
6502 pass_runs_with_pca_fdc_signal: 0,
6503 pass_runs_with_threshold_signal: 0,
6504 pass_run_dsfb_raw_boundary_nuisance_rate: 0.9986329460,
6505 pass_run_dsfb_persistent_boundary_nuisance_rate: 0.9904,
6506 pass_run_dsfb_raw_violation_nuisance_rate: 0.9740259740,
6507 pass_run_dsfb_persistent_violation_nuisance_rate: 0.7724,
6508 pass_run_ewma_nuisance_rate: 0.9863294600136705,
6509 pass_run_cusum_nuisance_rate: 1.0,
6510 pass_run_run_energy_nuisance_rate: 0.5263,
6511 pass_run_pca_fdc_nuisance_rate: 0.9316,
6512 pass_run_threshold_nuisance_rate: 0.974025974025974,
6513 },
6514 lead_time_summary: LeadTimeSummary {
6515 failure_runs_with_raw_boundary_lead: 103,
6516 failure_runs_with_persistent_boundary_lead: 103,
6517 failure_runs_with_raw_violation_lead: 104,
6518 failure_runs_with_persistent_violation_lead: 104,
6519 failure_runs_with_threshold_lead: 104,
6520 failure_runs_with_ewma_lead: 104,
6521 failure_runs_with_cusum_lead: 104,
6522 failure_runs_with_run_energy_lead: 0,
6523 failure_runs_with_pca_fdc_lead: 103,
6524 mean_raw_boundary_lead_runs: Some(19.67),
6525 mean_persistent_boundary_lead_runs: Some(19.54),
6526 mean_raw_violation_lead_runs: Some(19.56),
6527 mean_persistent_violation_lead_runs: Some(18.0),
6528 mean_threshold_lead_runs: Some(19.557692307692307),
6529 mean_ewma_lead_runs: Some(19.576923076923077),
6530 mean_cusum_lead_runs: Some(19.58653846153846),
6531 mean_run_energy_lead_runs: Some(16.31),
6532 mean_pca_fdc_lead_runs: Some(19.009708737864077),
6533 mean_raw_boundary_minus_cusum_delta_runs: None,
6534 mean_raw_boundary_minus_run_energy_delta_runs: None,
6535 mean_raw_boundary_minus_pca_fdc_delta_runs: None,
6536 mean_raw_boundary_minus_threshold_delta_runs: None,
6537 mean_raw_boundary_minus_ewma_delta_runs: None,
6538 mean_persistent_boundary_minus_cusum_delta_runs: None,
6539 mean_persistent_boundary_minus_run_energy_delta_runs: None,
6540 mean_persistent_boundary_minus_pca_fdc_delta_runs: None,
6541 mean_persistent_boundary_minus_threshold_delta_runs: None,
6542 mean_persistent_boundary_minus_ewma_delta_runs: None,
6543 mean_raw_violation_minus_cusum_delta_runs: None,
6544 mean_raw_violation_minus_run_energy_delta_runs: None,
6545 mean_raw_violation_minus_pca_fdc_delta_runs: None,
6546 mean_raw_violation_minus_threshold_delta_runs: None,
6547 mean_raw_violation_minus_ewma_delta_runs: None,
6548 mean_persistent_violation_minus_cusum_delta_runs: None,
6549 mean_persistent_violation_minus_run_energy_delta_runs: None,
6550 mean_persistent_violation_minus_pca_fdc_delta_runs: None,
6551 mean_persistent_violation_minus_threshold_delta_runs: None,
6552 mean_persistent_violation_minus_ewma_delta_runs: None,
6553 },
6554 density_summary: DensitySummary {
6555 density_window: 5,
6556 mean_raw_boundary_density_failure: 0.0,
6557 mean_raw_boundary_density_pass: 0.0,
6558 mean_persistent_boundary_density_failure: 0.0,
6559 mean_persistent_boundary_density_pass: 0.0,
6560 mean_raw_violation_density_failure: 0.0,
6561 mean_raw_violation_density_pass: 0.0,
6562 mean_persistent_violation_density_failure: 0.0,
6563 mean_persistent_violation_density_pass: 0.0,
6564 mean_threshold_density_failure: 0.0,
6565 mean_threshold_density_pass: 0.0,
6566 mean_ewma_density_failure: 0.0,
6567 mean_ewma_density_pass: 0.0,
6568 mean_cusum_density_failure: 0.0,
6569 mean_cusum_density_pass: 0.0,
6570 },
6571 boundary_episode_summary: BoundaryEpisodeSummary {
6572 raw_episode_count: 28607,
6573 persistent_episode_count: 0,
6574 mean_raw_episode_length: None,
6575 mean_persistent_episode_length: None,
6576 max_raw_episode_length: 0,
6577 max_persistent_episode_length: 0,
6578 raw_non_escalating_episode_fraction: None,
6579 persistent_non_escalating_episode_fraction: None,
6580 },
6581 dsa_summary: None,
6582 motif_metrics: Vec::new(),
6583 per_failure_run_signals: Vec::new(),
6584 density_metrics: Vec::new(),
6585 feature_metrics: Vec::new(),
6586 top_feature_indices: Vec::new(),
6587 }
6588 }
6589
6590 #[test]
6591 fn cohort_selection_is_deterministic() {
6592 let first = build_feature_cohorts(&sample_ranking());
6593 let second = build_feature_cohorts(&sample_ranking());
6594 assert_eq!(
6595 serde_json::to_value(&first).unwrap(),
6596 serde_json::to_value(&second).unwrap()
6597 );
6598 assert_eq!(first.top_4.len(), 4);
6599 assert!(first
6600 .seed_feature_report
6601 .iter()
6602 .any(|seed| seed.feature_name == "S059"));
6603 }
6604
6605 #[test]
6606 fn seed_feature_check_artifact_is_emitted_deterministically() {
6607 let cohorts = build_feature_cohorts(&sample_ranking());
6608 let artifact = build_seed_feature_check(&cohorts);
6609 assert_eq!(artifact.requested_seed_features.len(), 6);
6610 assert_eq!(artifact.seed_feature_report[0].feature_name, "S059");
6611 assert!(artifact.seed_feature_report[0].in_top_4);
6612 }
6613
6614 #[test]
6615 fn precursor_quality_csv_format_is_stable() {
6616 let row = CohortGridResult {
6617 ranking_strategy: "compression_biased".into(),
6618 ranking_formula: RANKING_FORMULA.into(),
6619 grid_row_id: 1,
6620 feature_trace_config_id: 0,
6621 cohort_name: "top_4".into(),
6622 cohort_size: 4,
6623 window: 5,
6624 persistence_runs: 2,
6625 alert_tau: 2.0,
6626 corroborating_m: 2,
6627 primary_run_signal: "signal".into(),
6628 failure_recall: 10,
6629 failure_runs: 12,
6630 failure_recall_rate: 0.8333,
6631 threshold_recall: 11,
6632 ewma_recall: 11,
6633 failure_recall_delta_vs_threshold: -1,
6634 failure_recall_delta_vs_ewma: -1,
6635 mean_lead_time_runs: Some(3.0),
6636 median_lead_time_runs: Some(3.0),
6637 threshold_mean_lead_time_runs: Some(2.0),
6638 ewma_mean_lead_time_runs: Some(2.0),
6639 mean_lead_delta_vs_threshold_runs: Some(1.0),
6640 mean_lead_delta_vs_ewma_runs: Some(1.0),
6641 pass_run_nuisance_proxy: 0.1,
6642 numeric_pass_run_nuisance_proxy: 0.15,
6643 ewma_nuisance: 0.2,
6644 threshold_nuisance: 0.3,
6645 pass_run_nuisance_delta_vs_ewma: -0.1,
6646 pass_run_nuisance_delta_vs_threshold: -0.2,
6647 pass_run_nuisance_delta_vs_numeric_dsa: -0.05,
6648 raw_boundary_episode_count: 20,
6649 dsa_episode_count: 4,
6650 dsa_episodes_preceding_failure: 3,
6651 mean_dsa_episode_length_runs: Some(2.0),
6652 max_dsa_episode_length_runs: 5,
6653 compression_ratio: Some(5.0),
6654 precursor_quality: Some(0.75),
6655 non_escalating_dsa_episode_fraction: Some(0.25),
6656 feature_level_active_points: 8,
6657 feature_level_alert_points: 4,
6658 persistence_suppression_fraction: Some(0.5),
6659 numeric_failure_recall: 11,
6660 policy_vs_numeric_recall_delta: -1,
6661 watch_point_count: 3,
6662 review_point_count: 3,
6663 escalate_point_count: 1,
6664 investigation_point_count: 4,
6665 numeric_investigation_point_count: 6,
6666 silenced_point_count: 2,
6667 rescued_point_count: 1,
6668 rescued_watch_to_review_points: 1,
6669 rescued_review_to_escalate_points: 0,
6670 review_escalate_points_per_pass_run: 0.2,
6671 numeric_alert_points_per_pass_run: 0.3,
6672 review_escalate_episodes_per_pass_run: 0.1,
6673 numeric_alert_episodes_per_pass_run: 0.15,
6674 primary_success: true,
6675 primary_success_reason: "ok".into(),
6676 };
6677 let temp = tempfile::tempdir().unwrap();
6678 let path = temp.path().join("precursor_quality.csv");
6679 write_precursor_quality_csv(&path, &[row]).unwrap();
6680 let content = std::fs::read_to_string(path).unwrap();
6681 assert!(content.contains("cohort_name,window,persistence_runs,alert_tau"));
6682 assert!(content.contains("top_4,5,2,2.000000,2,20,4,3,0.750000,5.000000"));
6683 }
6684
6685 #[test]
6686 fn delta_target_assessment_reports_unreached_forty_percent_goal() {
6687 let baseline_row = CohortGridResult {
6688 ranking_strategy: "compression_biased".into(),
6689 ranking_formula: RANKING_FORMULA.into(),
6690 grid_row_id: 0,
6691 feature_trace_config_id: 0,
6692 cohort_name: "all_features".into(),
6693 cohort_size: 100,
6694 window: 10,
6695 persistence_runs: 2,
6696 alert_tau: 2.0,
6697 corroborating_m: 1,
6698 primary_run_signal: "signal".into(),
6699 failure_recall: 100,
6700 failure_runs: 104,
6701 failure_recall_rate: 100.0 / 104.0,
6702 threshold_recall: 104,
6703 ewma_recall: 104,
6704 failure_recall_delta_vs_threshold: -4,
6705 failure_recall_delta_vs_ewma: -4,
6706 mean_lead_time_runs: Some(18.7),
6707 median_lead_time_runs: Some(20.0),
6708 threshold_mean_lead_time_runs: Some(19.557692307692307),
6709 ewma_mean_lead_time_runs: Some(19.576923076923077),
6710 mean_lead_delta_vs_threshold_runs: Some(-0.8577),
6711 mean_lead_delta_vs_ewma_runs: Some(-0.8769),
6712 pass_run_nuisance_proxy: 0.8311688311688312,
6713 numeric_pass_run_nuisance_proxy: 0.9330,
6714 ewma_nuisance: 0.9863294600136705,
6715 threshold_nuisance: 0.974025974025974,
6716 pass_run_nuisance_delta_vs_ewma: -0.15516062884483928,
6717 pass_run_nuisance_delta_vs_threshold: -0.1428571428571428,
6718 pass_run_nuisance_delta_vs_numeric_dsa: -0.10183116883116884,
6719 raw_boundary_episode_count: 28607,
6720 dsa_episode_count: 65,
6721 dsa_episodes_preceding_failure: 52,
6722 mean_dsa_episode_length_runs: Some(17.0),
6723 max_dsa_episode_length_runs: 110,
6724 compression_ratio: Some(440.10769230769233),
6725 precursor_quality: Some(0.8),
6726 non_escalating_dsa_episode_fraction: Some(0.0),
6727 feature_level_active_points: 0,
6728 feature_level_alert_points: 0,
6729 persistence_suppression_fraction: None,
6730 numeric_failure_recall: 99,
6731 policy_vs_numeric_recall_delta: 1,
6732 watch_point_count: 0,
6733 review_point_count: 0,
6734 escalate_point_count: 0,
6735 investigation_point_count: 3892,
6736 numeric_investigation_point_count: 8014,
6737 silenced_point_count: 0,
6738 rescued_point_count: 0,
6739 rescued_watch_to_review_points: 0,
6740 rescued_review_to_escalate_points: 0,
6741 review_escalate_points_per_pass_run: 2.515379357484621,
6742 numeric_alert_points_per_pass_run: 5.187286397812714,
6743 review_escalate_episodes_per_pass_run: 0.08133971291866028,
6744 numeric_alert_episodes_per_pass_run: 0.05468215994531784,
6745 primary_success: false,
6746 primary_success_reason: "baseline".into(),
6747 };
6748 let optimized_row = CohortGridResult {
6749 ranking_strategy: "compression_biased".into(),
6750 ranking_formula: RANKING_FORMULA.into(),
6751 grid_row_id: 1,
6752 feature_trace_config_id: 0,
6753 cohort_name: "all_features".into(),
6754 cohort_size: 100,
6755 window: 10,
6756 persistence_runs: 4,
6757 alert_tau: 2.0,
6758 corroborating_m: 1,
6759 primary_run_signal: "signal".into(),
6760 failure_recall: 103,
6761 failure_runs: 104,
6762 failure_recall_rate: 103.0 / 104.0,
6763 threshold_recall: 104,
6764 ewma_recall: 104,
6765 failure_recall_delta_vs_threshold: -1,
6766 failure_recall_delta_vs_ewma: -1,
6767 mean_lead_time_runs: Some(17.980582524271846),
6768 median_lead_time_runs: Some(20.0),
6769 threshold_mean_lead_time_runs: Some(19.557692307692307),
6770 ewma_mean_lead_time_runs: Some(19.576923076923077),
6771 mean_lead_delta_vs_threshold_runs: Some(-1.7475728155339805),
6772 mean_lead_delta_vs_ewma_runs: Some(-1.766990291262136),
6773 pass_run_nuisance_proxy: 0.7997265892002734,
6774 numeric_pass_run_nuisance_proxy: 0.9180,
6775 ewma_nuisance: 0.9863294600136705,
6776 threshold_nuisance: 0.974025974025974,
6777 pass_run_nuisance_delta_vs_ewma: -0.1866028708133971,
6778 pass_run_nuisance_delta_vs_threshold: -0.17429938482570062,
6779 pass_run_nuisance_delta_vs_numeric_dsa: -0.11827341079972659,
6780 raw_boundary_episode_count: 28607,
6781 dsa_episode_count: 73,
6782 dsa_episodes_preceding_failure: 57,
6783 mean_dsa_episode_length_runs: Some(17.041095890410958),
6784 max_dsa_episode_length_runs: 110,
6785 compression_ratio: Some(391.8767123287671),
6786 precursor_quality: Some(0.7808219178082192),
6787 non_escalating_dsa_episode_fraction: Some(0.0),
6788 feature_level_active_points: 0,
6789 feature_level_alert_points: 0,
6790 persistence_suppression_fraction: None,
6791 numeric_failure_recall: 99,
6792 policy_vs_numeric_recall_delta: 4,
6793 watch_point_count: 0,
6794 review_point_count: 0,
6795 escalate_point_count: 0,
6796 investigation_point_count: 3892,
6797 numeric_investigation_point_count: 8014,
6798 silenced_point_count: 0,
6799 rescued_point_count: 57,
6800 rescued_watch_to_review_points: 57,
6801 rescued_review_to_escalate_points: 0,
6802 review_escalate_points_per_pass_run: 2.515379357484621,
6803 numeric_alert_points_per_pass_run: 5.187286397812714,
6804 review_escalate_episodes_per_pass_run: 0.08133971291866028,
6805 numeric_alert_episodes_per_pass_run: 0.05468215994531784,
6806 primary_success: true,
6807 primary_success_reason: "selected".into(),
6808 };
6809 let metrics = sample_metrics_for_delta_target();
6810 let assessment = compute_delta_target_assessment(
6811 &optimized_row,
6812 std::slice::from_ref(&optimized_row),
6813 std::slice::from_ref(&optimized_row),
6814 &baseline_row,
6815 &metrics,
6816 );
6817
6818 assert!(!assessment.primary_target_met);
6819 assert!(!assessment.ideal_target_met);
6820 assert!(!assessment.secondary_target_met);
6821 assert!(
6822 (assessment.selected_configuration.delta_nuisance_vs_ewma - 0.18918918918918917).abs()
6823 < 1.0e-9
6824 );
6825 assert!(
6826 (assessment
6827 .selected_configuration
6828 .delta_nuisance_vs_current_dsa
6829 - 0.037828947368421136)
6830 .abs()
6831 < 1.0e-9
6832 );
6833 }
6834
6835 #[test]
6836 fn delta_target_assessment_prefers_best_recall_preserving_delta_row() {
6837 let template_row = CohortGridResult {
6838 ranking_strategy: "compression_biased".into(),
6839 ranking_formula: RANKING_FORMULA.into(),
6840 grid_row_id: 1,
6841 feature_trace_config_id: 0,
6842 cohort_name: "all_features".into(),
6843 cohort_size: 100,
6844 window: 10,
6845 persistence_runs: 4,
6846 alert_tau: 2.0,
6847 corroborating_m: 1,
6848 primary_run_signal: "signal".into(),
6849 failure_recall: 103,
6850 failure_runs: 104,
6851 failure_recall_rate: 103.0 / 104.0,
6852 threshold_recall: 104,
6853 ewma_recall: 104,
6854 failure_recall_delta_vs_threshold: -1,
6855 failure_recall_delta_vs_ewma: -1,
6856 mean_lead_time_runs: Some(17.980582524271846),
6857 median_lead_time_runs: Some(20.0),
6858 threshold_mean_lead_time_runs: Some(19.557692307692307),
6859 ewma_mean_lead_time_runs: Some(19.576923076923077),
6860 mean_lead_delta_vs_threshold_runs: Some(-1.7475728155339805),
6861 mean_lead_delta_vs_ewma_runs: Some(-1.766990291262136),
6862 pass_run_nuisance_proxy: 0.7997265892002734,
6863 numeric_pass_run_nuisance_proxy: 0.9180,
6864 ewma_nuisance: 0.9863294600136705,
6865 threshold_nuisance: 0.974025974025974,
6866 pass_run_nuisance_delta_vs_ewma: -0.1866028708133971,
6867 pass_run_nuisance_delta_vs_threshold: -0.17429938482570062,
6868 pass_run_nuisance_delta_vs_numeric_dsa: -0.11827341079972659,
6869 raw_boundary_episode_count: 28607,
6870 dsa_episode_count: 73,
6871 dsa_episodes_preceding_failure: 57,
6872 mean_dsa_episode_length_runs: Some(17.041095890410958),
6873 max_dsa_episode_length_runs: 110,
6874 compression_ratio: Some(391.8767123287671),
6875 precursor_quality: Some(0.7808219178082192),
6876 non_escalating_dsa_episode_fraction: Some(0.0),
6877 feature_level_active_points: 0,
6878 feature_level_alert_points: 0,
6879 persistence_suppression_fraction: None,
6880 numeric_failure_recall: 99,
6881 policy_vs_numeric_recall_delta: 4,
6882 watch_point_count: 0,
6883 review_point_count: 0,
6884 escalate_point_count: 0,
6885 investigation_point_count: 3892,
6886 numeric_investigation_point_count: 8014,
6887 silenced_point_count: 0,
6888 rescued_point_count: 57,
6889 rescued_watch_to_review_points: 57,
6890 rescued_review_to_escalate_points: 0,
6891 review_escalate_points_per_pass_run: 2.515379357484621,
6892 numeric_alert_points_per_pass_run: 5.187286397812714,
6893 review_escalate_episodes_per_pass_run: 0.08133971291866028,
6894 numeric_alert_episodes_per_pass_run: 0.05468215994531784,
6895 primary_success: true,
6896 primary_success_reason: "selected".into(),
6897 };
6898 let baseline_row = CohortGridResult {
6899 failure_recall: 100,
6900 failure_recall_rate: 100.0 / 104.0,
6901 failure_recall_delta_vs_threshold: -4,
6902 failure_recall_delta_vs_ewma: -4,
6903 mean_lead_time_runs: Some(18.7),
6904 mean_lead_delta_vs_threshold_runs: Some(-0.8577),
6905 mean_lead_delta_vs_ewma_runs: Some(-0.8769),
6906 pass_run_nuisance_proxy: 0.8311688311688312,
6907 numeric_pass_run_nuisance_proxy: 0.9330,
6908 dsa_episode_count: 65,
6909 compression_ratio: Some(440.10769230769233),
6910 precursor_quality: Some(0.8),
6911 numeric_failure_recall: 99,
6912 policy_vs_numeric_recall_delta: 1,
6913 rescued_point_count: 0,
6914 rescued_watch_to_review_points: 0,
6915 primary_success: false,
6916 primary_success_reason: "baseline".into(),
6917 ..template_row.clone()
6918 };
6919 let selected_row = template_row.clone();
6920 let weaker_recall_preserving_row = CohortGridResult {
6921 ranking_strategy: "recall_aware".into(),
6922 persistence_runs: 2,
6923 pass_run_nuisance_proxy: 0.8386876281613124,
6924 pass_run_nuisance_delta_vs_ewma: -0.14764183185235812,
6925 pass_run_nuisance_delta_vs_threshold: -0.13533834586466164,
6926 pass_run_nuisance_delta_vs_numeric_dsa: -0.09432679900680764,
6927 dsa_episode_count: 67,
6928 compression_ratio: Some(426.97014925373134),
6929 precursor_quality: Some(0.8059701492537313),
6930 ..template_row.clone()
6931 };
6932 let metrics = sample_metrics_for_delta_target();
6933 let assessment = compute_delta_target_assessment(
6934 &selected_row,
6935 std::slice::from_ref(&selected_row),
6936 &[selected_row.clone(), weaker_recall_preserving_row],
6937 &baseline_row,
6938 &metrics,
6939 );
6940
6941 let best = assessment
6942 .best_recall_103_candidate
6943 .expect("best recall row");
6944 assert_eq!(best.configuration, row_label(&selected_row));
6945 assert!((best.delta_nuisance_vs_ewma - 0.18918918918918917).abs() < 1.0e-9);
6946 }
6947}