use crate::baselines::BaselineSet;
use crate::error::Result;
use crate::heuristics::{
FeaturePolicyOverride, HeuristicAlertClass, PERSISTENT_INSTABILITY_CLUSTER,
PRE_FAILURE_SLOW_DRIFT, RECURRENT_BOUNDARY_APPROACH, TRANSITION_EXCURSION,
};
use crate::metrics::BenchmarkMetrics;
use crate::nominal::NominalModel;
use crate::precursor::{
evaluate_dsa, evaluate_dsa_with_policy, project_dsa_to_cohort, DsaConfig, DsaEvaluation,
DsaPolicyRuntime, RecallRescueConfig,
};
use crate::preprocessing::PreparedDataset;
use crate::residual::ResidualSet;
use crate::semiotics::{ScaffoldSemioticsArtifacts, SemanticLayer};
use crate::signs::SignSet;
use crate::{error::DsfbSemiconductorError, grammar::GrammarSet};
use csv::Writer;
use serde::Serialize;
use std::cmp::Ordering;
use std::collections::{BTreeMap, BTreeSet};
use std::path::Path;
const RANKING_FORMULA: &str =
"candidate_score = z(dsfb_raw_boundary_points) - z(dsfb_raw_violation_points) + z(ewma_alarm_points) - I(missing_fraction > 0.50) * 2.0";
const RECALL_AWARE_RANKING_FORMULA: &str =
"candidate_score_recall = z(pre_failure_run_hits) + z(motif_precision_proxy) + z(ewma_alarm_points) + 0.5 * z(dsfb_raw_boundary_points) + 0.5 * z(recall_rescue_contribution) - 0.5 * z(dsfb_raw_violation_points) - I(missing_fraction > 0.50) * 2.0";
const BURDEN_AWARE_RANKING_FORMULA: &str =
"candidate_score_burden = z(pre_failure_run_hits) + z(motif_precision_proxy) + 0.5 * z(dsfb_raw_boundary_points) + 0.5 * z(recall_rescue_contribution) - z(operator_burden_contribution) - 0.5 * z(dsfb_raw_violation_points) - I(missing_fraction > 0.50) * 2.0";
const DSFB_AWARE_RANKING_FORMULA: &str =
"candidate_score_dsfb = z(pre_failure_run_hits) + z(motif_precision_proxy) + 0.5 * z(recall_rescue_contribution) + 0.5 * z(semantic_persistence_contribution) + 0.5 * z(grouped_semantic_support) + 0.25 * z(dsfb_raw_boundary_points) - z(operator_burden_contribution) - 0.5 * z(violation_overdominance_penalty) - I(missing_fraction > 0.50) * 2.0";
const MISSINGNESS_PENALTY_THRESHOLD: f64 = 0.50;
const MISSINGNESS_PENALTY_VALUE: f64 = 2.0;
const RECALL_TOLERANCE: usize = 1;
const PRIMARY_DELTA_TARGET: f64 = 0.40;
const SECONDARY_DELTA_TARGET: f64 = 0.40;
const CORROBORATION_SWEEP: &[usize] = &[1, 2, 3, 5];
const DSA_WINDOW_SWEEP: &[usize] = &[5, 10, 15];
const DSA_PERSISTENCE_SWEEP: &[usize] = &[2, 3, 4];
const DSA_TAU_SWEEP: &[f64] = &[2.0, 2.5, 3.0];
const CURRENT_BASELINE_SCORE: f64 = 8.1;
const FORECAST_PRIMARY_ONLY: f64 = 8.8;
const FORECAST_PRIMARY_PLUS_SECONDARY: f64 = 9.1;
const FORECAST_RECALL_SHORTFALL_VALUE: f64 = 8.3;
const SEED_FEATURES: &[&str] = &["S059", "S044", "S061", "S222", "S354", "S173"];
const OPTIMIZATION_RESCUE_WINDOW: usize = 5;
const OPTIMIZATION_RESCUE_MIN_HITS: usize = 4;
const OPTIMIZATION_RESCUE_FRAGMENTATION: f64 = 0.5;
const OPTIMIZATION_OVERRIDE_MAX_MISSINGNESS: f64 = 0.05;
const OPERATOR_DELTA_THRESHOLD: f64 = 0.40;
const MAX_FAILURE_DRIVEN_NUISANCE_OVERRIDES: usize = 5;
const MAX_FAILURE_DRIVEN_ISOLATED_NUISANCE_OVERRIDES: usize = 3;
#[derive(Debug, Clone, Serialize)]
pub struct FeatureRankingRow {
pub ranking_strategy: String,
pub ranking_formula: String,
pub feature_index: usize,
pub feature_name: String,
pub dsfb_raw_boundary_points: usize,
pub dsfb_persistent_boundary_points: usize,
pub dsfb_raw_violation_points: usize,
pub dsfb_persistent_violation_points: usize,
pub ewma_alarm_points: usize,
pub threshold_alarm_points: usize,
pub pre_failure_run_hits: usize,
pub motif_precision_proxy: Option<f64>,
pub recall_rescue_contribution: Option<f64>,
pub operator_burden_contribution: Option<f64>,
pub semantic_persistence_contribution: Option<f64>,
pub grouped_semantic_support: Option<f64>,
pub violation_overdominance_penalty: Option<f64>,
pub missing_fraction: f64,
pub z_pre_failure_run_hits: Option<f64>,
pub z_motif_precision_proxy: Option<f64>,
pub z_recall_rescue_contribution: Option<f64>,
pub z_operator_burden_contribution: Option<f64>,
pub z_semantic_persistence_contribution: Option<f64>,
pub z_grouped_semantic_support: Option<f64>,
pub z_violation_overdominance_penalty: Option<f64>,
pub z_boundary: f64,
pub z_violation: f64,
pub z_ewma: f64,
pub missingness_penalty: f64,
pub candidate_score: f64,
pub score_breakdown: String,
pub rank: usize,
}
#[derive(Debug, Clone, Serialize)]
pub struct FeatureRankingComparisonRow {
pub feature_index: usize,
pub feature_name: String,
pub compression_rank: Option<usize>,
pub recall_aware_rank: Option<usize>,
pub burden_aware_rank: Option<usize>,
pub dsfb_aware_rank: Option<usize>,
pub compression_score: Option<f64>,
pub recall_aware_score: Option<f64>,
pub burden_aware_score: Option<f64>,
pub dsfb_aware_score: Option<f64>,
pub rank_delta_recall_minus_compression: Option<i64>,
pub rank_delta_burden_minus_compression: Option<i64>,
pub rank_delta_dsfb_aware_minus_compression: Option<i64>,
}
#[derive(Debug, Clone, Serialize)]
pub struct CohortMember {
pub feature_index: usize,
pub feature_name: String,
pub ranking_score: f64,
pub dsfb_boundary_points: usize,
pub dsfb_violation_points: usize,
pub ewma_alarm_points: usize,
pub threshold_alarm_points: usize,
pub missing_fraction: f64,
pub reason_for_inclusion: String,
}
#[derive(Debug, Clone, Serialize)]
pub struct SeedFeatureReport {
pub feature_name: String,
pub found_in_ranking: bool,
pub rank: Option<usize>,
pub candidate_score: Option<f64>,
pub in_top_4: bool,
pub in_top_8: bool,
pub in_top_16: bool,
pub top_4_note: String,
pub top_8_note: String,
pub top_16_note: String,
}
#[derive(Debug, Clone, Serialize)]
pub struct SeedFeatureCheckArtifact {
pub ranking_formula: String,
pub requested_seed_features: Vec<String>,
pub seed_feature_report: Vec<SeedFeatureReport>,
}
#[derive(Debug, Clone, Serialize)]
pub struct FeatureCohorts {
pub ranking_formula: String,
pub missingness_penalty_threshold: f64,
pub missingness_penalty_value: f64,
pub top_4: Vec<CohortMember>,
pub top_8: Vec<CohortMember>,
pub top_16: Vec<CohortMember>,
pub all_features: Vec<CohortMember>,
pub seed_feature_report: Vec<SeedFeatureReport>,
}
#[derive(Debug, Clone, Serialize)]
pub struct CohortGridResult {
pub ranking_strategy: String,
pub ranking_formula: String,
pub grid_row_id: usize,
pub feature_trace_config_id: usize,
pub cohort_name: String,
pub cohort_size: usize,
pub window: usize,
pub persistence_runs: usize,
pub alert_tau: f64,
pub corroborating_m: usize,
pub primary_run_signal: String,
pub failure_recall: usize,
pub failure_runs: usize,
pub failure_recall_rate: f64,
pub threshold_recall: usize,
pub ewma_recall: usize,
pub failure_recall_delta_vs_threshold: i64,
pub failure_recall_delta_vs_ewma: i64,
pub mean_lead_time_runs: Option<f64>,
pub median_lead_time_runs: Option<f64>,
pub threshold_mean_lead_time_runs: Option<f64>,
pub ewma_mean_lead_time_runs: Option<f64>,
pub mean_lead_delta_vs_threshold_runs: Option<f64>,
pub mean_lead_delta_vs_ewma_runs: Option<f64>,
pub pass_run_nuisance_proxy: f64,
pub numeric_pass_run_nuisance_proxy: f64,
pub ewma_nuisance: f64,
pub threshold_nuisance: f64,
pub pass_run_nuisance_delta_vs_ewma: f64,
pub pass_run_nuisance_delta_vs_threshold: f64,
pub pass_run_nuisance_delta_vs_numeric_dsa: f64,
pub raw_boundary_episode_count: usize,
pub dsa_episode_count: usize,
pub dsa_episodes_preceding_failure: usize,
pub mean_dsa_episode_length_runs: Option<f64>,
pub max_dsa_episode_length_runs: usize,
pub compression_ratio: Option<f64>,
pub precursor_quality: Option<f64>,
pub non_escalating_dsa_episode_fraction: Option<f64>,
pub feature_level_active_points: usize,
pub feature_level_alert_points: usize,
pub persistence_suppression_fraction: Option<f64>,
pub numeric_failure_recall: usize,
pub policy_vs_numeric_recall_delta: i64,
pub watch_point_count: usize,
pub review_point_count: usize,
pub escalate_point_count: usize,
pub investigation_point_count: usize,
pub numeric_investigation_point_count: usize,
pub silenced_point_count: usize,
pub rescued_point_count: usize,
pub rescued_watch_to_review_points: usize,
pub rescued_review_to_escalate_points: usize,
pub review_escalate_points_per_pass_run: f64,
pub numeric_alert_points_per_pass_run: f64,
pub review_escalate_episodes_per_pass_run: f64,
pub numeric_alert_episodes_per_pass_run: f64,
pub primary_success: bool,
pub primary_success_reason: String,
}
#[derive(Debug, Clone, Serialize)]
pub struct FeaturePolicySummaryRow {
pub feature_index: usize,
pub feature_name: String,
pub compression_rank: Option<usize>,
pub recall_aware_rank: Option<usize>,
pub burden_aware_rank: Option<usize>,
pub dsfb_aware_rank: Option<usize>,
pub pre_failure_run_hits: usize,
pub motif_precision_proxy: Option<f64>,
pub missing_fraction: f64,
pub rescue_eligible: bool,
pub rescue_priority: usize,
pub alert_class_override: Option<HeuristicAlertClass>,
pub requires_persistence_override: Option<bool>,
pub requires_corroboration_override: Option<bool>,
pub minimum_window_override: Option<usize>,
pub minimum_hits_override: Option<usize>,
pub maximum_allowed_fragmentation_override: Option<f64>,
pub override_reason: String,
pub allow_watch_only: Option<bool>,
pub allow_review_without_escalate: Option<bool>,
pub suppress_if_isolated: Option<bool>,
}
#[derive(Debug, Clone, Serialize)]
pub struct RecallRescueResultRow {
pub ranking_strategy: String,
pub cohort_name: String,
pub window: usize,
pub persistence_runs: usize,
pub alert_tau: f64,
pub corroborating_m: usize,
pub failure_recall: usize,
pub pass_run_nuisance_proxy: f64,
pub rescued_point_count: usize,
pub rescued_watch_to_review_points: usize,
pub rescued_review_to_escalate_points: usize,
}
#[derive(Debug, Clone, Serialize)]
pub struct MissedFailureDiagnosticRow {
pub failure_run_index: usize,
pub nearest_feature_name: Option<String>,
pub nearest_feature_score: Option<f64>,
pub nearest_feature_policy_state: Option<String>,
pub nearest_feature_resolved_alert_class: Option<String>,
pub nearest_feature_boundary_density_w: Option<f64>,
pub nearest_feature_ewma_occupancy_w: Option<f64>,
pub nearest_feature_motif_recurrence_w: Option<f64>,
pub nearest_feature_fragmentation_proxy_w: Option<f64>,
pub nearest_feature_consistent: Option<bool>,
pub ranking_exclusion: bool,
pub cohort_selection: bool,
pub policy_suppression: bool,
pub fragmentation_ceiling: bool,
pub directional_consistency_gate: bool,
pub persistence_gate: bool,
pub corroboration_threshold: bool,
pub rescue_gate_not_activating: bool,
pub exact_miss_rule: String,
pub bounded_rescue_would_recover: bool,
pub recovered_after_optimization: bool,
pub optimized_feature_name: Option<String>,
}
#[derive(Debug, Clone, Serialize)]
pub struct RecallCriticalFeatureRow {
pub failure_run_index: usize,
pub feature_index: Option<usize>,
pub feature_name: Option<String>,
pub compression_rank: Option<usize>,
pub recall_aware_rank: Option<usize>,
pub max_structural_score: Option<f64>,
pub resolved_alert_class: Option<String>,
pub policy_state: Option<String>,
pub boundary_density_w: Option<f64>,
pub ewma_occupancy_w: Option<f64>,
pub motif_recurrence_w: Option<f64>,
pub fragmentation_proxy_w: Option<f64>,
pub consistent: Option<bool>,
pub exact_miss_rule: String,
pub feature_override_exists: bool,
pub rescue_priority: Option<usize>,
pub allow_review_without_escalate: Option<bool>,
pub bounded_feature_override_would_recover: bool,
pub recovered_after_optimization: bool,
pub optimized_feature_name: Option<String>,
pub recall_rescue_contribution: f64,
}
#[derive(Debug, Clone, Serialize)]
pub struct PolicyContributionAnalysisRow {
pub configuration_role: String,
pub contribution_type: String,
pub name: String,
pub value: f64,
pub note: String,
}
#[derive(Debug, Clone, Serialize)]
pub struct CohortMotifPolicyContributionRow {
pub grid_row_id: usize,
pub cohort_name: String,
pub cohort_size: usize,
pub window: usize,
pub persistence_runs: usize,
pub alert_tau: f64,
pub corroborating_m: usize,
pub motif_name: String,
pub alert_class_default: HeuristicAlertClass,
pub watch_points: usize,
pub review_points: usize,
pub escalate_points: usize,
pub silent_suppression_points: usize,
pub pass_review_or_escalate_points: usize,
pub pre_failure_review_or_escalate_points: usize,
}
#[derive(Debug, Clone, Serialize)]
pub struct CohortBestRow {
pub cohort_name: String,
pub best_row: CohortGridResult,
}
#[derive(Debug, Clone, Serialize)]
pub struct CohortFailureAnalysis {
pub closest_cohort: String,
pub closest_grid_point: String,
pub closest_policy_setting: String,
pub closest_nuisance: f64,
pub closest_recall: usize,
pub ewma_nuisance: f64,
pub threshold_recall: usize,
pub limiting_factor: String,
pub corroboration_effect: String,
pub policy_vs_numeric_note: String,
pub ranking_quality_note: String,
pub all_feature_dsa_vs_cohort_note: String,
pub best_near_success_source: String,
pub nuisance_motif_classes: String,
pub useful_precursor_motif_classes: String,
}
#[derive(Debug, Clone, Serialize)]
pub struct CohortGridSummary {
pub ranking_formula: String,
pub primary_success_condition_definition: String,
pub recall_tolerance_runs: usize,
pub grid_point_count: usize,
pub optimization_priority_order: Vec<String>,
pub success_row_count: usize,
pub any_success_row: bool,
pub closest_to_success: Option<CohortGridResult>,
pub best_success_row: Option<CohortGridResult>,
pub best_precursor_quality_row: Option<CohortGridResult>,
pub cross_feature_corroboration_effect: String,
pub limiting_factor: String,
}
#[derive(Debug, Clone, Serialize)]
pub struct CohortDsaSummary {
pub ranking_formula: String,
pub primary_success_condition: String,
pub recall_tolerance_runs: usize,
pub cohort_results: Vec<CohortGridResult>,
pub best_by_cohort: Vec<CohortBestRow>,
pub closest_to_success: Option<CohortGridResult>,
pub best_primary_success: Option<CohortGridResult>,
pub best_precursor_quality_row: Option<CohortGridResult>,
pub selected_configuration: Option<CohortGridResult>,
pub best_cohort: Option<String>,
pub any_primary_success: bool,
pub failure_analysis: Option<CohortFailureAnalysis>,
pub grid_point_count: usize,
pub optimization_priority_order: Vec<String>,
pub cross_feature_corroboration_effect: String,
pub limiting_factor: String,
}
#[derive(Debug, Clone)]
pub struct CohortExecution {
pub grid_summary: CohortGridSummary,
pub summary: CohortDsaSummary,
pub motif_policy_contributions: Vec<CohortMotifPolicyContributionRow>,
pub selected_evaluation: DsaEvaluation,
}
#[derive(Debug, Clone)]
pub struct OptimizationExecution {
pub baseline_feature_ranking: Vec<FeatureRankingRow>,
pub baseline_feature_cohorts: FeatureCohorts,
pub baseline_execution: CohortExecution,
pub recall_aware_feature_ranking: Vec<FeatureRankingRow>,
pub burden_aware_feature_ranking: Vec<FeatureRankingRow>,
pub dsfb_aware_feature_ranking: Vec<FeatureRankingRow>,
pub ranking_comparison: Vec<FeatureRankingComparisonRow>,
pub recall_aware_feature_cohorts: FeatureCohorts,
pub burden_aware_feature_cohorts: FeatureCohorts,
pub dsfb_aware_feature_cohorts: FeatureCohorts,
pub feature_policy_overrides: Vec<FeaturePolicyOverride>,
pub feature_policy_summary: Vec<FeaturePolicySummaryRow>,
pub optimized_execution: CohortExecution,
pub recall_aware_execution: CohortExecution,
pub burden_aware_execution: CohortExecution,
pub dsfb_aware_execution: CohortExecution,
pub pareto_frontier: Vec<CohortGridResult>,
pub stage_a_candidates: Vec<CohortGridResult>,
pub stage_b_candidates: Vec<CohortGridResult>,
pub stage1_candidates: Vec<CohortGridResult>,
pub stage2_candidates: Vec<CohortGridResult>,
pub recall_rescue_results: Vec<RecallRescueResultRow>,
pub missed_failure_diagnostics: Vec<MissedFailureDiagnosticRow>,
pub recall_critical_features: Vec<RecallCriticalFeatureRow>,
pub policy_contribution_analysis: Vec<PolicyContributionAnalysisRow>,
pub operator_baselines: OperatorBaselines,
pub operator_delta_targets: OperatorDeltaTargets,
pub operator_delta_attainment_matrix: Vec<OperatorDeltaAttainmentRow>,
pub policy_operator_burden_contributions: Vec<OperatorBurdenContributionRow>,
pub recall_recovery_efficiency: Vec<RecallRecoveryEfficiencyRow>,
pub single_change_iteration_log: Vec<SingleChangeIterationRow>,
pub delta_target_assessment: DeltaTargetAssessment,
}
#[derive(Debug, Clone, Serialize)]
pub struct DeltaCandidateSummary {
pub configuration: String,
pub ranking_strategy: String,
pub cohort_name: String,
pub window: usize,
pub persistence_runs: usize,
pub alert_tau: f64,
pub corroborating_m: usize,
pub failure_recall: usize,
pub failure_runs: usize,
pub pass_run_nuisance_proxy: f64,
pub delta_nuisance_vs_ewma: f64,
pub delta_nuisance_vs_current_dsa: f64,
pub mean_lead_time_runs: Option<f64>,
pub precursor_quality: Option<f64>,
pub compression_ratio: Option<f64>,
}
#[derive(Debug, Clone, Serialize)]
pub struct DeltaTargetAssessment {
pub primary_target_definition: String,
pub secondary_target_definition: String,
pub ewma_nuisance_baseline: f64,
pub current_policy_dsa_nuisance_baseline: f64,
pub primary_delta_target: f64,
pub secondary_delta_target: f64,
pub primary_target_nuisance_ceiling: f64,
pub secondary_target_nuisance_ceiling: f64,
pub selected_configuration: DeltaCandidateSummary,
pub primary_target_met: bool,
pub ideal_target_met: bool,
pub secondary_target_met: bool,
pub mean_lead_time_ge_ewma: bool,
pub mean_lead_time_ge_threshold: bool,
pub best_recall_103_candidate: Option<DeltaCandidateSummary>,
pub best_recall_104_candidate: Option<DeltaCandidateSummary>,
pub best_secondary_target_candidate: Option<DeltaCandidateSummary>,
pub best_stage_a_delta_candidate: Option<DeltaCandidateSummary>,
pub best_reachable_pareto_point: DeltaCandidateSummary,
pub assessment_note: String,
}
#[derive(Debug, Clone, Serialize)]
pub struct OperatorBaselineLayer {
pub name: String,
pub investigation_points: usize,
pub episode_count: usize,
pub review_escalate_points_per_pass_run: f64,
pub review_escalate_episodes_per_pass_run: f64,
pub precursor_quality: Option<f64>,
pub recall: usize,
pub pass_run_nuisance_proxy: f64,
}
#[derive(Debug, Clone, Serialize)]
pub struct OperatorBaselines {
pub investigation_baseline_layer: String,
pub episode_baseline_layer: String,
pub review_burden_baseline_layer: String,
pub baseline_investigation_points: usize,
pub baseline_episode_count: usize,
pub baseline_review_escalate_points_per_pass_run: f64,
pub baseline_review_escalate_episodes_per_pass_run: f64,
pub baseline_precursor_quality: Option<f64>,
pub baseline_recall: usize,
pub numeric_only_dsa: OperatorBaselineLayer,
pub current_policy_dsa: OperatorBaselineLayer,
pub raw_boundary: OperatorBaselineLayer,
}
#[derive(Debug, Clone, Serialize)]
pub struct OperatorDeltaTargets {
pub primary_success_definition: String,
pub recall_tolerance_runs: usize,
pub selected_configuration: DeltaCandidateSummary,
pub baseline_investigation_points: usize,
pub baseline_episode_count: usize,
pub baseline_review_points_per_pass_run: f64,
pub baseline_review_episodes_per_pass_run: f64,
pub optimized_review_escalate_points: usize,
pub optimized_episode_count: usize,
pub optimized_review_points_per_pass_run: f64,
pub optimized_review_episodes_per_pass_run: f64,
pub delta_investigation_load: f64,
pub delta_episode_count: f64,
pub delta_review_points_per_pass_run: f64,
pub delta_review_episodes_per_pass_run: f64,
pub precursor_quality_status: String,
pub recall_equals_threshold: bool,
pub recall_within_tolerance: bool,
pub recall_ge_103: bool,
pub recall_eq_104: bool,
pub delta_nuisance_vs_ewma: f64,
pub delta_nuisance_vs_threshold: f64,
pub mean_lead_delta_vs_ewma: Option<f64>,
pub mean_lead_delta_vs_threshold: Option<f64>,
pub median_lead_delta_vs_ewma: Option<f64>,
pub median_lead_delta_vs_threshold: Option<f64>,
pub stable_precursor_lead_time_delta: Option<f64>,
}
#[derive(Debug, Clone, Serialize)]
pub struct OperatorDeltaAttainmentRow {
pub configuration_role: String,
pub configuration: String,
pub delta_investigation_load: f64,
pub delta_episode_count: f64,
pub delta_review_points_per_pass_run: f64,
pub delta_review_episodes_per_pass_run: f64,
pub precursor_quality_status: String,
pub recall: usize,
pub mean_lead_time_runs: Option<f64>,
pub delta_nuisance_vs_ewma: f64,
pub target_a_investigation_load_ge_040: bool,
pub target_b_episode_count_ge_040: bool,
pub target_c_review_points_per_pass_run_ge_040: bool,
pub target_d_review_episodes_per_pass_run_ge_040: bool,
pub target_e_precursor_quality_preserved_or_improved: bool,
pub target_f_recall_ge_103: bool,
pub target_g_recall_eq_104: bool,
pub target_h_nuisance_ge_015: bool,
pub target_h_nuisance_ge_025: bool,
pub target_h_nuisance_ge_040: bool,
pub target_i_stable_precursor_lead_improved: Option<bool>,
}
#[derive(Debug, Clone, Serialize)]
pub struct OperatorBurdenContributionRow {
pub configuration_role: String,
pub contribution_scope: String,
pub name: String,
pub contribution_type: String,
pub value: f64,
pub note: String,
}
#[derive(Debug, Clone, Serialize)]
pub struct RecallRecoveryEfficiencyRow {
pub failure_run_index: Option<usize>,
pub baseline_configuration: String,
pub optimized_configuration: String,
pub recovered_failures: i64,
pub added_review_escalate_points: i64,
pub added_episode_count: i64,
pub added_review_points_per_pass_run: f64,
pub added_review_episodes_per_pass_run: f64,
pub added_nuisance_runs: i64,
pub recovered_failures_per_added_review_escalate_point: Option<f64>,
pub recovered_failures_per_added_episode: Option<f64>,
pub recovered_failures_per_added_pass_run_burden: Option<f64>,
pub recovered_failures_per_added_nuisance_run: Option<f64>,
}
#[derive(Debug, Clone, Serialize)]
pub struct SingleChangeIterationRow {
pub iteration: usize,
pub change_kind: String,
pub change_target: String,
pub reason: String,
pub derived_from_failures: String,
pub targets_nuisance_class: String,
pub affected_failures: String,
pub accepted: bool,
pub recall: usize,
pub investigation_points: usize,
pub episode_count: usize,
pub precursor_quality: Option<f64>,
pub pass_run_nuisance_proxy: f64,
pub delta_recall: i64,
pub delta_investigation_points: i64,
pub delta_episode_count: i64,
pub delta_precursor_quality: Option<f64>,
pub delta_pass_run_nuisance_proxy: f64,
}
#[derive(Debug, Clone, Serialize)]
pub struct CategoryForecast {
pub category: String,
pub current: String,
pub forecast: String,
pub justification: String,
}
#[derive(Debug, Clone, Serialize)]
pub struct ForecastSupportingMetrics {
pub chosen_configuration: String,
pub dsa_nuisance: f64,
pub ewma_nuisance: f64,
pub dsa_recall: usize,
pub threshold_recall: usize,
pub recall_tolerance_runs: usize,
pub dsa_mean_lead_time_runs: Option<f64>,
pub ewma_mean_lead_time_runs: Option<f64>,
pub threshold_mean_lead_time_runs: Option<f64>,
pub dsa_precursor_quality: Option<f64>,
pub all_feature_dsa_precursor_quality: Option<f64>,
pub dsa_compression_ratio: Option<f64>,
pub all_feature_dsa_compression_ratio: Option<f64>,
}
#[derive(Debug, Clone, Serialize)]
pub struct RatingDeltaForecast {
pub current_baseline_score: f64,
pub primary_success_condition: String,
pub recall_tolerance_runs: usize,
pub chosen_configuration: String,
pub primary_success_met: bool,
pub secondary_targets_met: bool,
pub secondary_lead_time_vs_ewma: bool,
pub secondary_lead_time_vs_threshold: bool,
pub secondary_precursor_quality_vs_all_feature_dsa: Option<bool>,
pub secondary_compression_material: Option<bool>,
pub forecast_score_if_primary_success_only: f64,
pub forecast_score_if_primary_plus_secondary_success: f64,
pub achieved_forecast_score: f64,
pub forecast_justification: String,
pub category_forecasts: Vec<CategoryForecast>,
pub supporting_metrics: ForecastSupportingMetrics,
}
#[derive(Debug, Clone, Serialize)]
pub struct RatingDeltaFailureAnalysis {
pub closest_configuration: String,
pub dsa_nuisance: f64,
pub ewma_nuisance: f64,
pub dsa_recall: usize,
pub threshold_recall: usize,
pub recall_tolerance_runs: usize,
pub nuisance_gap: f64,
pub recall_gap_runs: i64,
pub nuisance_missed_by: String,
pub recall_preserved: bool,
pub limiting_factor: String,
}
pub fn compute_feature_ranking(metrics: &BenchmarkMetrics) -> Vec<FeatureRankingRow> {
let analyzable = metrics
.feature_metrics
.iter()
.filter(|feature| feature.analyzable)
.collect::<Vec<_>>();
if analyzable.is_empty() {
return Vec::new();
}
let boundary_values = analyzable
.iter()
.map(|feature| feature.dsfb_raw_boundary_points as f64)
.collect::<Vec<_>>();
let violation_values = analyzable
.iter()
.map(|feature| feature.dsfb_raw_violation_points as f64)
.collect::<Vec<_>>();
let ewma_values = analyzable
.iter()
.map(|feature| feature.ewma_alarm_points as f64)
.collect::<Vec<_>>();
let (boundary_mean, boundary_std) = mean_std(&boundary_values);
let (violation_mean, violation_std) = mean_std(&violation_values);
let (ewma_mean, ewma_std) = mean_std(&ewma_values);
let mut ranking = analyzable
.iter()
.map(|feature| {
let z_boundary = z_score(
feature.dsfb_raw_boundary_points as f64,
boundary_mean,
boundary_std,
);
let z_violation = z_score(
feature.dsfb_raw_violation_points as f64,
violation_mean,
violation_std,
);
let z_ewma = z_score(feature.ewma_alarm_points as f64, ewma_mean, ewma_std);
let missingness_penalty = if feature.missing_fraction > MISSINGNESS_PENALTY_THRESHOLD {
MISSINGNESS_PENALTY_VALUE
} else {
0.0
};
let candidate_score = z_boundary - z_violation + z_ewma - missingness_penalty;
FeatureRankingRow {
ranking_strategy: "compression_biased".into(),
ranking_formula: RANKING_FORMULA.into(),
feature_index: feature.feature_index,
feature_name: feature.feature_name.clone(),
dsfb_raw_boundary_points: feature.dsfb_raw_boundary_points,
dsfb_persistent_boundary_points: feature.dsfb_persistent_boundary_points,
dsfb_raw_violation_points: feature.dsfb_raw_violation_points,
dsfb_persistent_violation_points: feature.dsfb_persistent_violation_points,
ewma_alarm_points: feature.ewma_alarm_points,
threshold_alarm_points: feature.threshold_alarm_points,
pre_failure_run_hits: feature.pre_failure_run_hits,
motif_precision_proxy: feature.motif_precision_proxy,
recall_rescue_contribution: None,
operator_burden_contribution: None,
semantic_persistence_contribution: None,
grouped_semantic_support: None,
violation_overdominance_penalty: None,
missing_fraction: feature.missing_fraction,
z_pre_failure_run_hits: None,
z_motif_precision_proxy: None,
z_recall_rescue_contribution: None,
z_operator_burden_contribution: None,
z_semantic_persistence_contribution: None,
z_grouped_semantic_support: None,
z_violation_overdominance_penalty: None,
z_boundary,
z_violation,
z_ewma,
missingness_penalty,
candidate_score,
score_breakdown: format!(
"{:+.4} boundary - {:+.4} violation + {:+.4} ewma - {:.1} missingness",
z_boundary, z_violation, z_ewma, missingness_penalty
),
rank: 0,
}
})
.collect::<Vec<_>>();
ranking.sort_by(|left, right| {
right
.candidate_score
.partial_cmp(&left.candidate_score)
.unwrap_or(Ordering::Equal)
.then_with(|| left.feature_name.cmp(&right.feature_name))
});
for (index, row) in ranking.iter_mut().enumerate() {
row.rank = index + 1;
}
ranking
}
pub fn compute_feature_ranking_recall_aware(
metrics: &BenchmarkMetrics,
recall_rescue_contributions: &BTreeMap<usize, f64>,
) -> Vec<FeatureRankingRow> {
let analyzable = metrics
.feature_metrics
.iter()
.filter(|feature| feature.analyzable)
.collect::<Vec<_>>();
if analyzable.is_empty() {
return Vec::new();
}
let pre_failure_values = analyzable
.iter()
.map(|feature| feature.pre_failure_run_hits as f64)
.collect::<Vec<_>>();
let motif_precision_values = analyzable
.iter()
.map(|feature| feature.motif_precision_proxy.unwrap_or(0.0))
.collect::<Vec<_>>();
let ewma_values = analyzable
.iter()
.map(|feature| feature.ewma_alarm_points as f64)
.collect::<Vec<_>>();
let boundary_values = analyzable
.iter()
.map(|feature| feature.dsfb_raw_boundary_points as f64)
.collect::<Vec<_>>();
let violation_values = analyzable
.iter()
.map(|feature| feature.dsfb_raw_violation_points as f64)
.collect::<Vec<_>>();
let recall_rescue_values = analyzable
.iter()
.map(|feature| {
recall_rescue_contributions
.get(&feature.feature_index)
.copied()
.unwrap_or(0.0)
})
.collect::<Vec<_>>();
let (pre_failure_mean, pre_failure_std) = mean_std(&pre_failure_values);
let (motif_precision_mean, motif_precision_std) = mean_std(&motif_precision_values);
let (ewma_mean, ewma_std) = mean_std(&ewma_values);
let (boundary_mean, boundary_std) = mean_std(&boundary_values);
let (violation_mean, violation_std) = mean_std(&violation_values);
let (recall_rescue_mean, recall_rescue_std) = mean_std(&recall_rescue_values);
let mut ranking = analyzable
.iter()
.map(|feature| {
let z_pre_failure_run_hits = z_score(
feature.pre_failure_run_hits as f64,
pre_failure_mean,
pre_failure_std,
);
let z_motif_precision_proxy = z_score(
feature.motif_precision_proxy.unwrap_or(0.0),
motif_precision_mean,
motif_precision_std,
);
let z_ewma = z_score(feature.ewma_alarm_points as f64, ewma_mean, ewma_std);
let z_boundary = z_score(
feature.dsfb_raw_boundary_points as f64,
boundary_mean,
boundary_std,
);
let z_violation = z_score(
feature.dsfb_raw_violation_points as f64,
violation_mean,
violation_std,
);
let recall_rescue_contribution = recall_rescue_contributions
.get(&feature.feature_index)
.copied()
.unwrap_or(0.0);
let z_recall_rescue_contribution = z_score(
recall_rescue_contribution,
recall_rescue_mean,
recall_rescue_std,
);
let missingness_penalty = if feature.missing_fraction > MISSINGNESS_PENALTY_THRESHOLD {
MISSINGNESS_PENALTY_VALUE
} else {
0.0
};
let candidate_score = z_pre_failure_run_hits
+ z_motif_precision_proxy
+ z_ewma
+ 0.5 * z_boundary
+ 0.5 * z_recall_rescue_contribution
- 0.5 * z_violation
- missingness_penalty;
FeatureRankingRow {
ranking_strategy: "recall_aware".into(),
ranking_formula: RECALL_AWARE_RANKING_FORMULA.into(),
feature_index: feature.feature_index,
feature_name: feature.feature_name.clone(),
dsfb_raw_boundary_points: feature.dsfb_raw_boundary_points,
dsfb_persistent_boundary_points: feature.dsfb_persistent_boundary_points,
dsfb_raw_violation_points: feature.dsfb_raw_violation_points,
dsfb_persistent_violation_points: feature.dsfb_persistent_violation_points,
ewma_alarm_points: feature.ewma_alarm_points,
threshold_alarm_points: feature.threshold_alarm_points,
pre_failure_run_hits: feature.pre_failure_run_hits,
motif_precision_proxy: feature.motif_precision_proxy,
recall_rescue_contribution: Some(recall_rescue_contribution),
operator_burden_contribution: None,
semantic_persistence_contribution: None,
grouped_semantic_support: None,
violation_overdominance_penalty: None,
missing_fraction: feature.missing_fraction,
z_pre_failure_run_hits: Some(z_pre_failure_run_hits),
z_motif_precision_proxy: Some(z_motif_precision_proxy),
z_recall_rescue_contribution: Some(z_recall_rescue_contribution),
z_operator_burden_contribution: None,
z_semantic_persistence_contribution: None,
z_grouped_semantic_support: None,
z_violation_overdominance_penalty: None,
z_boundary,
z_violation,
z_ewma,
missingness_penalty,
candidate_score,
score_breakdown: format!(
"{:+.4} pre_failure + {:+.4} motif_precision + {:+.4} ewma + 0.5*{:+.4} boundary + 0.5*{:+.4} recall_rescue - 0.5*{:+.4} violation - {:.1} missingness",
z_pre_failure_run_hits,
z_motif_precision_proxy,
z_ewma,
z_boundary,
z_recall_rescue_contribution,
z_violation,
missingness_penalty
),
rank: 0,
}
})
.collect::<Vec<_>>();
ranking.sort_by(|left, right| {
right
.candidate_score
.partial_cmp(&left.candidate_score)
.unwrap_or(Ordering::Equal)
.then_with(|| left.feature_name.cmp(&right.feature_name))
});
for (index, row) in ranking.iter_mut().enumerate() {
row.rank = index + 1;
}
ranking
}
pub fn compute_feature_ranking_burden_aware(
metrics: &BenchmarkMetrics,
recall_rescue_contributions: &BTreeMap<usize, f64>,
operator_burden_contributions: &BTreeMap<usize, f64>,
) -> Vec<FeatureRankingRow> {
let analyzable = metrics
.feature_metrics
.iter()
.filter(|feature| feature.analyzable)
.collect::<Vec<_>>();
if analyzable.is_empty() {
return Vec::new();
}
let pre_failure_values = analyzable
.iter()
.map(|feature| feature.pre_failure_run_hits as f64)
.collect::<Vec<_>>();
let motif_precision_values = analyzable
.iter()
.map(|feature| feature.motif_precision_proxy.unwrap_or(0.0))
.collect::<Vec<_>>();
let boundary_values = analyzable
.iter()
.map(|feature| feature.dsfb_raw_boundary_points as f64)
.collect::<Vec<_>>();
let violation_values = analyzable
.iter()
.map(|feature| feature.dsfb_raw_violation_points as f64)
.collect::<Vec<_>>();
let recall_rescue_values = analyzable
.iter()
.map(|feature| {
recall_rescue_contributions
.get(&feature.feature_index)
.copied()
.unwrap_or(0.0)
})
.collect::<Vec<_>>();
let operator_burden_values = analyzable
.iter()
.map(|feature| {
operator_burden_contributions
.get(&feature.feature_index)
.copied()
.unwrap_or(0.0)
})
.collect::<Vec<_>>();
let (pre_failure_mean, pre_failure_std) = mean_std(&pre_failure_values);
let (motif_precision_mean, motif_precision_std) = mean_std(&motif_precision_values);
let (boundary_mean, boundary_std) = mean_std(&boundary_values);
let (violation_mean, violation_std) = mean_std(&violation_values);
let (recall_rescue_mean, recall_rescue_std) = mean_std(&recall_rescue_values);
let (operator_burden_mean, operator_burden_std) = mean_std(&operator_burden_values);
let mut ranking = analyzable
.iter()
.map(|feature| {
let recall_rescue_contribution = recall_rescue_contributions
.get(&feature.feature_index)
.copied()
.unwrap_or(0.0);
let operator_burden_contribution = operator_burden_contributions
.get(&feature.feature_index)
.copied()
.unwrap_or(0.0);
let z_pre_failure_run_hits = z_score(
feature.pre_failure_run_hits as f64,
pre_failure_mean,
pre_failure_std,
);
let z_motif_precision_proxy = z_score(
feature.motif_precision_proxy.unwrap_or(0.0),
motif_precision_mean,
motif_precision_std,
);
let z_boundary = z_score(
feature.dsfb_raw_boundary_points as f64,
boundary_mean,
boundary_std,
);
let z_violation = z_score(
feature.dsfb_raw_violation_points as f64,
violation_mean,
violation_std,
);
let z_recall_rescue_contribution = z_score(
recall_rescue_contribution,
recall_rescue_mean,
recall_rescue_std,
);
let z_operator_burden_contribution = z_score(
operator_burden_contribution,
operator_burden_mean,
operator_burden_std,
);
let missingness_penalty = if feature.missing_fraction > MISSINGNESS_PENALTY_THRESHOLD {
MISSINGNESS_PENALTY_VALUE
} else {
0.0
};
let candidate_score = z_pre_failure_run_hits
+ z_motif_precision_proxy
+ 0.5 * z_boundary
+ 0.5 * z_recall_rescue_contribution
- z_operator_burden_contribution
- 0.5 * z_violation
- missingness_penalty;
FeatureRankingRow {
ranking_strategy: "burden_aware".into(),
ranking_formula: BURDEN_AWARE_RANKING_FORMULA.into(),
feature_index: feature.feature_index,
feature_name: feature.feature_name.clone(),
dsfb_raw_boundary_points: feature.dsfb_raw_boundary_points,
dsfb_persistent_boundary_points: feature.dsfb_persistent_boundary_points,
dsfb_raw_violation_points: feature.dsfb_raw_violation_points,
dsfb_persistent_violation_points: feature.dsfb_persistent_violation_points,
ewma_alarm_points: feature.ewma_alarm_points,
threshold_alarm_points: feature.threshold_alarm_points,
pre_failure_run_hits: feature.pre_failure_run_hits,
motif_precision_proxy: feature.motif_precision_proxy,
recall_rescue_contribution: Some(recall_rescue_contribution),
operator_burden_contribution: Some(operator_burden_contribution),
semantic_persistence_contribution: None,
grouped_semantic_support: None,
violation_overdominance_penalty: None,
missing_fraction: feature.missing_fraction,
z_pre_failure_run_hits: Some(z_pre_failure_run_hits),
z_motif_precision_proxy: Some(z_motif_precision_proxy),
z_recall_rescue_contribution: Some(z_recall_rescue_contribution),
z_operator_burden_contribution: Some(z_operator_burden_contribution),
z_semantic_persistence_contribution: None,
z_grouped_semantic_support: None,
z_violation_overdominance_penalty: None,
z_boundary,
z_violation,
z_ewma: 0.0,
missingness_penalty,
candidate_score,
score_breakdown: format!(
"{:+.4} pre_failure + {:+.4} motif_precision + 0.5*{:+.4} boundary + 0.5*{:+.4} recall_rescue - {:+.4} burden - 0.5*{:+.4} violation - {:.1} missingness",
z_pre_failure_run_hits,
z_motif_precision_proxy,
z_boundary,
z_recall_rescue_contribution,
z_operator_burden_contribution,
z_violation,
missingness_penalty
),
rank: 0,
}
})
.collect::<Vec<_>>();
ranking.sort_by(|left, right| {
right
.candidate_score
.partial_cmp(&left.candidate_score)
.unwrap_or(Ordering::Equal)
.then_with(|| left.feature_name.cmp(&right.feature_name))
});
for (index, row) in ranking.iter_mut().enumerate() {
row.rank = index + 1;
}
ranking
}
pub fn compute_feature_ranking_dsfb_aware(
metrics: &BenchmarkMetrics,
recall_rescue_contributions: &BTreeMap<usize, f64>,
operator_burden_contributions: &BTreeMap<usize, f64>,
semantic_layer: &SemanticLayer,
scaffold_semiotics: &ScaffoldSemioticsArtifacts,
) -> Vec<FeatureRankingRow> {
let analyzable = metrics
.feature_metrics
.iter()
.filter(|feature| feature.analyzable)
.collect::<Vec<_>>();
if analyzable.is_empty() {
return Vec::new();
}
let semantic_persistence_contributions =
semantic_persistence_contribution_by_feature(semantic_layer);
let grouped_semantic_support = grouped_semantic_support_by_feature(scaffold_semiotics);
let pre_failure_values = analyzable
.iter()
.map(|feature| feature.pre_failure_run_hits as f64)
.collect::<Vec<_>>();
let motif_precision_values = analyzable
.iter()
.map(|feature| feature.motif_precision_proxy.unwrap_or(0.0))
.collect::<Vec<_>>();
let boundary_values = analyzable
.iter()
.map(|feature| feature.dsfb_raw_boundary_points as f64)
.collect::<Vec<_>>();
let recall_rescue_values = analyzable
.iter()
.map(|feature| {
recall_rescue_contributions
.get(&feature.feature_index)
.copied()
.unwrap_or(0.0)
})
.collect::<Vec<_>>();
let operator_burden_values = analyzable
.iter()
.map(|feature| {
operator_burden_contributions
.get(&feature.feature_index)
.copied()
.unwrap_or(0.0)
})
.collect::<Vec<_>>();
let semantic_persistence_values = analyzable
.iter()
.map(|feature| {
semantic_persistence_contributions
.get(&feature.feature_index)
.copied()
.unwrap_or(0.0)
})
.collect::<Vec<_>>();
let grouped_support_values = analyzable
.iter()
.map(|feature| {
grouped_semantic_support
.get(&feature.feature_index)
.copied()
.unwrap_or(0.0)
})
.collect::<Vec<_>>();
let violation_overdominance_values = analyzable
.iter()
.map(|feature| {
feature.dsfb_raw_violation_points as f64
/ feature.dsfb_raw_boundary_points.max(1) as f64
})
.collect::<Vec<_>>();
let (pre_failure_mean, pre_failure_std) = mean_std(&pre_failure_values);
let (motif_precision_mean, motif_precision_std) = mean_std(&motif_precision_values);
let (boundary_mean, boundary_std) = mean_std(&boundary_values);
let (recall_rescue_mean, recall_rescue_std) = mean_std(&recall_rescue_values);
let (operator_burden_mean, operator_burden_std) = mean_std(&operator_burden_values);
let (semantic_persistence_mean, semantic_persistence_std) =
mean_std(&semantic_persistence_values);
let (grouped_support_mean, grouped_support_std) = mean_std(&grouped_support_values);
let (violation_overdominance_mean, violation_overdominance_std) =
mean_std(&violation_overdominance_values);
let mut ranking = analyzable
.iter()
.map(|feature| {
let recall_rescue_contribution = recall_rescue_contributions
.get(&feature.feature_index)
.copied()
.unwrap_or(0.0);
let operator_burden_contribution = operator_burden_contributions
.get(&feature.feature_index)
.copied()
.unwrap_or(0.0);
let semantic_persistence_contribution = semantic_persistence_contributions
.get(&feature.feature_index)
.copied()
.unwrap_or(0.0);
let grouped_semantic_support = grouped_semantic_support
.get(&feature.feature_index)
.copied()
.unwrap_or(0.0);
let violation_overdominance_penalty = feature.dsfb_raw_violation_points as f64
/ feature.dsfb_raw_boundary_points.max(1) as f64;
let z_pre_failure_run_hits = z_score(
feature.pre_failure_run_hits as f64,
pre_failure_mean,
pre_failure_std,
);
let z_motif_precision_proxy = z_score(
feature.motif_precision_proxy.unwrap_or(0.0),
motif_precision_mean,
motif_precision_std,
);
let z_boundary = z_score(
feature.dsfb_raw_boundary_points as f64,
boundary_mean,
boundary_std,
);
let z_recall_rescue_contribution = z_score(
recall_rescue_contribution,
recall_rescue_mean,
recall_rescue_std,
);
let z_operator_burden_contribution = z_score(
operator_burden_contribution,
operator_burden_mean,
operator_burden_std,
);
let z_semantic_persistence_contribution = z_score(
semantic_persistence_contribution,
semantic_persistence_mean,
semantic_persistence_std,
);
let z_grouped_semantic_support = z_score(
grouped_semantic_support,
grouped_support_mean,
grouped_support_std,
);
let z_violation_overdominance_penalty = z_score(
violation_overdominance_penalty,
violation_overdominance_mean,
violation_overdominance_std,
);
let missingness_penalty = if feature.missing_fraction > MISSINGNESS_PENALTY_THRESHOLD {
MISSINGNESS_PENALTY_VALUE
} else {
0.0
};
let candidate_score = z_pre_failure_run_hits
+ z_motif_precision_proxy
+ 0.5 * z_recall_rescue_contribution
+ 0.5 * z_semantic_persistence_contribution
+ 0.5 * z_grouped_semantic_support
+ 0.25 * z_boundary
- z_operator_burden_contribution
- 0.5 * z_violation_overdominance_penalty
- missingness_penalty;
FeatureRankingRow {
ranking_strategy: "dsfb_aware".into(),
ranking_formula: DSFB_AWARE_RANKING_FORMULA.into(),
feature_index: feature.feature_index,
feature_name: feature.feature_name.clone(),
dsfb_raw_boundary_points: feature.dsfb_raw_boundary_points,
dsfb_persistent_boundary_points: feature.dsfb_persistent_boundary_points,
dsfb_raw_violation_points: feature.dsfb_raw_violation_points,
dsfb_persistent_violation_points: feature.dsfb_persistent_violation_points,
ewma_alarm_points: feature.ewma_alarm_points,
threshold_alarm_points: feature.threshold_alarm_points,
pre_failure_run_hits: feature.pre_failure_run_hits,
motif_precision_proxy: feature.motif_precision_proxy,
recall_rescue_contribution: Some(recall_rescue_contribution),
operator_burden_contribution: Some(operator_burden_contribution),
semantic_persistence_contribution: Some(semantic_persistence_contribution),
grouped_semantic_support: Some(grouped_semantic_support),
violation_overdominance_penalty: Some(violation_overdominance_penalty),
missing_fraction: feature.missing_fraction,
z_pre_failure_run_hits: Some(z_pre_failure_run_hits),
z_motif_precision_proxy: Some(z_motif_precision_proxy),
z_recall_rescue_contribution: Some(z_recall_rescue_contribution),
z_operator_burden_contribution: Some(z_operator_burden_contribution),
z_semantic_persistence_contribution: Some(z_semantic_persistence_contribution),
z_grouped_semantic_support: Some(z_grouped_semantic_support),
z_violation_overdominance_penalty: Some(z_violation_overdominance_penalty),
z_boundary,
z_violation: 0.0,
z_ewma: 0.0,
missingness_penalty,
candidate_score,
score_breakdown: format!(
"{:+.4} pre_failure + {:+.4} motif_precision + 0.5*{:+.4} recall_rescue + 0.5*{:+.4} semantic_persistence + 0.5*{:+.4} grouped_support + 0.25*{:+.4} boundary - {:+.4} burden - 0.5*{:+.4} violation_overdominance - {:.1} missingness",
z_pre_failure_run_hits,
z_motif_precision_proxy,
z_recall_rescue_contribution,
z_semantic_persistence_contribution,
z_grouped_semantic_support,
z_boundary,
z_operator_burden_contribution,
z_violation_overdominance_penalty,
missingness_penalty
),
rank: 0,
}
})
.collect::<Vec<_>>();
ranking.sort_by(|left, right| {
right
.candidate_score
.partial_cmp(&left.candidate_score)
.unwrap_or(Ordering::Equal)
.then_with(|| left.feature_name.cmp(&right.feature_name))
});
for (index, row) in ranking.iter_mut().enumerate() {
row.rank = index + 1;
}
ranking
}
pub fn write_feature_ranking_csv(path: &Path, ranking: &[FeatureRankingRow]) -> Result<()> {
let mut writer = Writer::from_path(path)?;
writer.write_record([
"ranking_strategy",
"rank",
"feature_index",
"feature_name",
"ranking_formula",
"dsfb_raw_boundary_points",
"dsfb_persistent_boundary_points",
"dsfb_raw_violation_points",
"dsfb_persistent_violation_points",
"ewma_alarm_points",
"threshold_alarm_points",
"pre_failure_run_hits",
"motif_precision_proxy",
"recall_rescue_contribution",
"operator_burden_contribution",
"semantic_persistence_contribution",
"grouped_semantic_support",
"violation_overdominance_penalty",
"missing_fraction",
"z_pre_failure_run_hits",
"z_motif_precision_proxy",
"z_recall_rescue_contribution",
"z_operator_burden_contribution",
"z_semantic_persistence_contribution",
"z_grouped_semantic_support",
"z_violation_overdominance_penalty",
"z_boundary",
"z_violation",
"z_ewma",
"missingness_penalty",
"candidate_score",
"score_breakdown",
])?;
for row in ranking {
writer.write_record([
row.ranking_strategy.clone(),
row.rank.to_string(),
row.feature_index.to_string(),
row.feature_name.clone(),
row.ranking_formula.clone(),
row.dsfb_raw_boundary_points.to_string(),
row.dsfb_persistent_boundary_points.to_string(),
row.dsfb_raw_violation_points.to_string(),
row.dsfb_persistent_violation_points.to_string(),
row.ewma_alarm_points.to_string(),
row.threshold_alarm_points.to_string(),
row.pre_failure_run_hits.to_string(),
format_option_csv(row.motif_precision_proxy),
format_option_csv(row.recall_rescue_contribution),
format_option_csv(row.operator_burden_contribution),
format_option_csv(row.semantic_persistence_contribution),
format_option_csv(row.grouped_semantic_support),
format_option_csv(row.violation_overdominance_penalty),
format!("{:.6}", row.missing_fraction),
format_option_csv(row.z_pre_failure_run_hits),
format_option_csv(row.z_motif_precision_proxy),
format_option_csv(row.z_recall_rescue_contribution),
format_option_csv(row.z_operator_burden_contribution),
format_option_csv(row.z_semantic_persistence_contribution),
format_option_csv(row.z_grouped_semantic_support),
format_option_csv(row.z_violation_overdominance_penalty),
format!("{:.6}", row.z_boundary),
format!("{:.6}", row.z_violation),
format!("{:.6}", row.z_ewma),
format!("{:.6}", row.missingness_penalty),
format!("{:.6}", row.candidate_score),
row.score_breakdown.clone(),
])?;
}
writer.flush()?;
Ok(())
}
pub fn compare_feature_rankings(
compression_ranking: &[FeatureRankingRow],
recall_aware_ranking: &[FeatureRankingRow],
burden_aware_ranking: &[FeatureRankingRow],
dsfb_aware_ranking: &[FeatureRankingRow],
) -> Vec<FeatureRankingComparisonRow> {
let compression_by_feature = compression_ranking
.iter()
.map(|row| (&row.feature_name, row))
.collect::<BTreeMap<_, _>>();
let recall_by_feature = recall_aware_ranking
.iter()
.map(|row| (&row.feature_name, row))
.collect::<BTreeMap<_, _>>();
let burden_by_feature = burden_aware_ranking
.iter()
.map(|row| (&row.feature_name, row))
.collect::<BTreeMap<_, _>>();
let dsfb_by_feature = dsfb_aware_ranking
.iter()
.map(|row| (&row.feature_name, row))
.collect::<BTreeMap<_, _>>();
let mut feature_names = compression_by_feature
.keys()
.copied()
.chain(recall_by_feature.keys().copied())
.chain(burden_by_feature.keys().copied())
.chain(dsfb_by_feature.keys().copied())
.collect::<Vec<_>>();
feature_names.sort_unstable();
feature_names.dedup();
feature_names
.into_iter()
.map(|feature_name| {
let compression = compression_by_feature.get(feature_name).copied();
let recall = recall_by_feature.get(feature_name).copied();
let burden = burden_by_feature.get(feature_name).copied();
let dsfb = dsfb_by_feature.get(feature_name).copied();
FeatureRankingComparisonRow {
feature_index: compression
.or(recall)
.or(burden)
.or(dsfb)
.map(|row| row.feature_index)
.unwrap_or_default(),
feature_name: feature_name.to_string(),
compression_rank: compression.map(|row| row.rank),
recall_aware_rank: recall.map(|row| row.rank),
burden_aware_rank: burden.map(|row| row.rank),
dsfb_aware_rank: dsfb.map(|row| row.rank),
compression_score: compression.map(|row| row.candidate_score),
recall_aware_score: recall.map(|row| row.candidate_score),
burden_aware_score: burden.map(|row| row.candidate_score),
dsfb_aware_score: dsfb.map(|row| row.candidate_score),
rank_delta_recall_minus_compression: match (compression, recall) {
(Some(compression), Some(recall)) => {
Some(recall.rank as i64 - compression.rank as i64)
}
_ => None,
},
rank_delta_burden_minus_compression: match (compression, burden) {
(Some(compression), Some(burden)) => {
Some(burden.rank as i64 - compression.rank as i64)
}
_ => None,
},
rank_delta_dsfb_aware_minus_compression: match (compression, dsfb) {
(Some(compression), Some(dsfb)) => {
Some(dsfb.rank as i64 - compression.rank as i64)
}
_ => None,
},
}
})
.collect()
}
fn semantic_persistence_contribution_by_feature(
semantic_layer: &SemanticLayer,
) -> BTreeMap<usize, f64> {
let top_candidate_by_feature_run = semantic_layer.ranked_candidates.iter().fold(
BTreeMap::<(usize, usize), (String, f64, usize)>::new(),
|mut acc, row| {
acc.entry((row.feature_index, row.run_index))
.and_modify(|existing| {
if row.rank < existing.2
|| (row.rank == existing.2 && row.structural_score_proxy > existing.1)
{
*existing = (
row.heuristic_name.clone(),
row.structural_score_proxy,
row.rank,
);
}
})
.or_insert((
row.heuristic_name.clone(),
row.structural_score_proxy,
row.rank,
));
acc
},
);
let by_feature = top_candidate_by_feature_run.into_iter().fold(
BTreeMap::<usize, Vec<(usize, String, f64)>>::new(),
|mut acc, ((feature_index, run_index), (heuristic_name, structural_score, _rank))| {
acc.entry(feature_index).or_default().push((
run_index,
heuristic_name.to_string(),
structural_score,
));
acc
},
);
let mut contributions = BTreeMap::<usize, f64>::new();
for (feature_index, mut rows) in by_feature {
rows.sort_by_key(|(run_index, _, _)| *run_index);
let mut contribution = 0.0;
let mut streak_len = 0usize;
let mut streak_score = 0.0;
let mut previous_run = None::<usize>;
let mut previous_heuristic = None::<String>;
for (run_index, heuristic_name, structural_score) in rows {
let continues = previous_run.is_some_and(|prev| prev + 1 == run_index)
&& previous_heuristic
.as_deref()
.is_some_and(|prev| prev == heuristic_name.as_str());
if continues {
streak_len += 1;
streak_score += structural_score;
} else {
if streak_len >= 2 {
contribution += streak_score;
}
streak_len = 1;
streak_score = structural_score;
}
previous_run = Some(run_index);
previous_heuristic = Some(heuristic_name);
}
if streak_len >= 2 {
contribution += streak_score;
}
contributions.insert(feature_index, contribution);
}
contributions
}
fn grouped_semantic_support_by_feature(
scaffold_semiotics: &ScaffoldSemioticsArtifacts,
) -> BTreeMap<usize, f64> {
let feature_index_by_name = scaffold_semiotics
.feature_signs
.iter()
.map(|row| (row.feature_name.as_str(), row.feature_index))
.collect::<BTreeMap<_, _>>();
let mut contributions = BTreeMap::<usize, f64>::new();
for row in &scaffold_semiotics.group_semantic_matches {
let participating = row
.participating_features
.split(',')
.filter(|feature_name| !feature_name.is_empty())
.collect::<Vec<_>>();
if participating.is_empty() {
continue;
}
let shared_support =
row.structural_score_proxy / row.rank.max(1) as f64 / participating.len() as f64;
for feature_name in participating {
let Some(&feature_index) = feature_index_by_name.get(feature_name) else {
continue;
};
*contributions.entry(feature_index).or_default() += shared_support;
}
}
contributions
}
pub fn write_operator_delta_attainment_matrix_csv(
path: &Path,
rows: &[OperatorDeltaAttainmentRow],
) -> Result<()> {
let mut writer = Writer::from_path(path)?;
for row in rows {
writer.serialize(row)?;
}
writer.flush()?;
Ok(())
}
pub fn write_operator_burden_contributions_csv(
path: &Path,
rows: &[OperatorBurdenContributionRow],
) -> Result<()> {
let mut writer = Writer::from_path(path)?;
for row in rows {
writer.serialize(row)?;
}
writer.flush()?;
Ok(())
}
pub fn write_recall_recovery_efficiency_csv(
path: &Path,
rows: &[RecallRecoveryEfficiencyRow],
) -> Result<()> {
let mut writer = Writer::from_path(path)?;
for row in rows {
writer.serialize(row)?;
}
writer.flush()?;
Ok(())
}
pub fn write_single_change_iteration_log_csv(
path: &Path,
rows: &[SingleChangeIterationRow],
) -> Result<()> {
let mut writer = Writer::from_path(path)?;
for row in rows {
writer.serialize(row)?;
}
writer.flush()?;
Ok(())
}
pub fn write_feature_ranking_comparison_csv(
path: &Path,
rows: &[FeatureRankingComparisonRow],
) -> Result<()> {
let mut writer = Writer::from_path(path)?;
for row in rows {
writer.serialize(row)?;
}
writer.flush()?;
Ok(())
}
pub fn write_feature_policy_summary_csv(
path: &Path,
rows: &[FeaturePolicySummaryRow],
) -> Result<()> {
let mut writer = Writer::from_path(path)?;
for row in rows {
writer.serialize(row)?;
}
writer.flush()?;
Ok(())
}
pub fn write_recall_rescue_results_csv(path: &Path, rows: &[RecallRescueResultRow]) -> Result<()> {
let mut writer = Writer::from_path(path)?;
for row in rows {
writer.serialize(row)?;
}
writer.flush()?;
Ok(())
}
pub fn write_missed_failure_diagnostics_csv(
path: &Path,
rows: &[MissedFailureDiagnosticRow],
) -> Result<()> {
let mut writer = Writer::from_path(path)?;
for row in rows {
writer.serialize(row)?;
}
writer.flush()?;
Ok(())
}
pub fn write_recall_critical_features_csv(
path: &Path,
rows: &[RecallCriticalFeatureRow],
) -> Result<()> {
let mut writer = Writer::from_path(path)?;
for row in rows {
writer.serialize(row)?;
}
writer.flush()?;
Ok(())
}
pub fn write_policy_contribution_analysis_csv(
path: &Path,
rows: &[PolicyContributionAnalysisRow],
) -> Result<()> {
let mut writer = Writer::from_path(path)?;
for row in rows {
writer.serialize(row)?;
}
writer.flush()?;
Ok(())
}
pub fn build_feature_cohorts(ranking: &[FeatureRankingRow]) -> FeatureCohorts {
let ranking_formula = ranking
.first()
.map(|row| row.ranking_formula.clone())
.unwrap_or_else(|| RANKING_FORMULA.into());
let top_4 = ranking
.iter()
.take(4)
.map(|row| cohort_member(row, "top_4"))
.collect::<Vec<_>>();
let top_8 = ranking
.iter()
.take(8)
.map(|row| cohort_member(row, "top_8"))
.collect::<Vec<_>>();
let top_16 = ranking
.iter()
.take(16)
.map(|row| cohort_member(row, "top_16"))
.collect::<Vec<_>>();
let all_features = ranking
.iter()
.map(|row| cohort_member(row, "all_features"))
.collect::<Vec<_>>();
let seed_feature_report = SEED_FEATURES
.iter()
.map(|seed| {
if let Some(row) = ranking.iter().find(|row| row.feature_name == *seed) {
SeedFeatureReport {
feature_name: (*seed).to_string(),
found_in_ranking: true,
rank: Some(row.rank),
candidate_score: Some(row.candidate_score),
in_top_4: row.rank <= 4,
in_top_8: row.rank <= 8,
in_top_16: row.rank <= 16,
top_4_note: seed_membership_note(row, 4, "top_4"),
top_8_note: seed_membership_note(row, 8, "top_8"),
top_16_note: seed_membership_note(row, 16, "top_16"),
}
} else {
let note =
"Excluded because the feature is not analyzable in the saved run metrics."
.to_string();
SeedFeatureReport {
feature_name: (*seed).to_string(),
found_in_ranking: false,
rank: None,
candidate_score: None,
in_top_4: false,
in_top_8: false,
in_top_16: false,
top_4_note: note.clone(),
top_8_note: note.clone(),
top_16_note: note,
}
}
})
.collect::<Vec<_>>();
FeatureCohorts {
ranking_formula,
missingness_penalty_threshold: MISSINGNESS_PENALTY_THRESHOLD,
missingness_penalty_value: MISSINGNESS_PENALTY_VALUE,
top_4,
top_8,
top_16,
all_features,
seed_feature_report,
}
}
pub fn build_seed_feature_check(cohorts: &FeatureCohorts) -> SeedFeatureCheckArtifact {
SeedFeatureCheckArtifact {
ranking_formula: cohorts.ranking_formula.clone(),
requested_seed_features: SEED_FEATURES
.iter()
.map(|seed| (*seed).to_string())
.collect(),
seed_feature_report: cohorts.seed_feature_report.clone(),
}
}
pub fn run_cohort_dsa_grid(
dataset: &PreparedDataset,
nominal: &NominalModel,
residuals: &ResidualSet,
signs: &SignSet,
baselines: &BaselineSet,
grammar: &GrammarSet,
cohorts: &FeatureCohorts,
pre_failure_lookback_runs: usize,
metrics: &BenchmarkMetrics,
) -> Result<CohortExecution> {
run_cohort_dsa_grid_with_policy(
dataset,
nominal,
residuals,
signs,
baselines,
grammar,
cohorts,
pre_failure_lookback_runs,
metrics,
&DsaPolicyRuntime::default(),
"compression_biased",
)
}
pub fn run_cohort_dsa_grid_with_policy(
dataset: &PreparedDataset,
nominal: &NominalModel,
residuals: &ResidualSet,
signs: &SignSet,
baselines: &BaselineSet,
grammar: &GrammarSet,
cohorts: &FeatureCohorts,
pre_failure_lookback_runs: usize,
metrics: &BenchmarkMetrics,
policy_runtime: &DsaPolicyRuntime,
ranking_strategy: &str,
) -> Result<CohortExecution> {
let cohort_specs = [
("top_4", cohorts.top_4.as_slice()),
("top_8", cohorts.top_8.as_slice()),
("top_16", cohorts.top_16.as_slice()),
("all_features", cohorts.all_features.as_slice()),
];
let threshold_recall = metrics.summary.failure_runs_with_preceding_threshold_signal;
let ewma_nuisance = metrics.summary.pass_run_ewma_nuisance_rate;
let mut grid_rows = Vec::new();
let mut motif_policy_rows = Vec::new();
let mut feature_trace_config_id = 0usize;
let mut grid_row_id = 0usize;
for &window in DSA_WINDOW_SWEEP {
for &persistence_runs in DSA_PERSISTENCE_SWEEP {
for &alert_tau in DSA_TAU_SWEEP {
let base_config = DsaConfig {
window,
persistence_runs,
alert_tau,
corroborating_feature_count_min: 1,
};
let base_evaluation = evaluate_dsa_with_policy(
dataset,
nominal,
residuals,
signs,
baselines,
grammar,
&base_config,
pre_failure_lookback_runs,
policy_runtime,
)?;
for (cohort_name, members) in cohort_specs {
if members.is_empty() {
continue;
}
let feature_indices = members
.iter()
.map(|member| member.feature_index)
.collect::<Vec<_>>();
for &corroborating_m in CORROBORATION_SWEEP {
if corroborating_m > feature_indices.len() {
continue;
}
let evaluation = project_dsa_to_cohort(
dataset,
nominal,
residuals,
baselines,
grammar,
&base_evaluation,
&feature_indices,
corroborating_m,
pre_failure_lookback_runs,
cohort_name,
)?;
let row = build_grid_row(
grid_row_id,
feature_trace_config_id,
ranking_strategy,
&cohorts.ranking_formula,
cohort_name,
members.len(),
&base_config,
corroborating_m,
dataset,
&evaluation,
metrics,
);
motif_policy_rows.extend(build_motif_policy_rows(&row, &evaluation));
grid_rows.push(row);
grid_row_id += 1;
}
}
feature_trace_config_id += 1;
}
}
}
let best_by_cohort = build_best_by_cohort(&grid_rows);
let closest_to_success = choose_closest_to_success(&grid_rows);
let best_primary_success = grid_rows
.iter()
.filter(|row| row.primary_success)
.cloned()
.min_by(compare_successful_rows);
let best_precursor_quality_row = grid_rows.iter().cloned().max_by(|left, right| {
compare_option_f64(left.precursor_quality, right.precursor_quality)
.then_with(|| compare_successful_rows(left, right))
});
let any_primary_success = best_primary_success.is_some();
let selected_configuration = best_primary_success
.clone()
.or_else(|| closest_to_success.clone());
let best_cohort = selected_configuration.as_ref().map(row_label);
let corroboration_effect = corroboration_effect(&grid_rows);
let limiting_factor = limiting_factor_from_row(
selected_configuration.as_ref(),
ewma_nuisance,
threshold_recall,
);
let failure_analysis = if any_primary_success {
None
} else {
build_failure_analysis(
&grid_rows,
&motif_policy_rows,
cohorts,
ewma_nuisance,
threshold_recall,
selected_configuration.as_ref(),
&corroboration_effect,
&limiting_factor,
)
};
let summary = CohortDsaSummary {
ranking_formula: cohorts.ranking_formula.clone(),
primary_success_condition: primary_success_condition(),
recall_tolerance_runs: RECALL_TOLERANCE,
cohort_results: grid_rows.clone(),
best_by_cohort,
closest_to_success: closest_to_success.clone(),
best_primary_success: best_primary_success.clone(),
best_precursor_quality_row: best_precursor_quality_row.clone(),
selected_configuration: selected_configuration.clone(),
best_cohort,
any_primary_success,
failure_analysis,
grid_point_count: grid_rows.len(),
optimization_priority_order: optimization_priority_order(),
cross_feature_corroboration_effect: corroboration_effect.clone(),
limiting_factor: limiting_factor.clone(),
};
let grid_summary = CohortGridSummary {
ranking_formula: cohorts.ranking_formula.clone(),
primary_success_condition_definition: primary_success_condition(),
recall_tolerance_runs: RECALL_TOLERANCE,
grid_point_count: grid_rows.len(),
optimization_priority_order: optimization_priority_order(),
success_row_count: grid_rows.iter().filter(|row| row.primary_success).count(),
any_success_row: any_primary_success,
closest_to_success: closest_to_success.clone(),
best_success_row: best_primary_success.clone(),
best_precursor_quality_row: best_precursor_quality_row,
cross_feature_corroboration_effect: corroboration_effect,
limiting_factor,
};
let selected_row = selected_configuration.ok_or_else(|| {
DsfbSemiconductorError::DatasetFormat("cohort grid produced no selectable row".into())
})?;
let selected_evaluation = rebuild_selected_evaluation(
dataset,
nominal,
residuals,
signs,
baselines,
grammar,
cohorts,
pre_failure_lookback_runs,
&selected_row,
)?;
Ok(CohortExecution {
grid_summary,
summary,
motif_policy_contributions: motif_policy_rows,
selected_evaluation,
})
}
pub fn run_recall_optimization(
dataset: &PreparedDataset,
nominal: &NominalModel,
residuals: &ResidualSet,
signs: &SignSet,
baselines: &BaselineSet,
grammar: &GrammarSet,
metrics: &BenchmarkMetrics,
semantic_layer: &SemanticLayer,
scaffold_semiotics: &ScaffoldSemioticsArtifacts,
pre_failure_lookback_runs: usize,
) -> Result<OptimizationExecution> {
let baseline_feature_ranking = compute_feature_ranking(metrics);
let baseline_feature_cohorts = build_feature_cohorts(&baseline_feature_ranking);
let baseline_execution = run_cohort_dsa_grid(
dataset,
nominal,
residuals,
signs,
baselines,
grammar,
&baseline_feature_cohorts,
pre_failure_lookback_runs,
metrics,
)?;
let recall_rescue_contributions =
recall_rescue_contribution_by_feature(&baseline_execution.selected_evaluation);
let operator_burden_contributions =
operator_burden_contribution_by_feature(dataset, &baseline_execution.selected_evaluation);
let recall_aware_feature_ranking =
compute_feature_ranking_recall_aware(metrics, &recall_rescue_contributions);
let burden_aware_feature_ranking = compute_feature_ranking_burden_aware(
metrics,
&recall_rescue_contributions,
&operator_burden_contributions,
);
let dsfb_aware_feature_ranking = compute_feature_ranking_dsfb_aware(
metrics,
&recall_rescue_contributions,
&operator_burden_contributions,
semantic_layer,
scaffold_semiotics,
);
let ranking_comparison = compare_feature_rankings(
&baseline_feature_ranking,
&recall_aware_feature_ranking,
&burden_aware_feature_ranking,
&dsfb_aware_feature_ranking,
);
let recall_aware_feature_cohorts = build_feature_cohorts(&recall_aware_feature_ranking);
let burden_aware_feature_cohorts = build_feature_cohorts(&burden_aware_feature_ranking);
let dsfb_aware_feature_cohorts = build_feature_cohorts(&dsfb_aware_feature_ranking);
let (feature_policy_overrides, single_change_iteration_log) = build_feature_policy_overrides(
dataset,
nominal,
residuals,
signs,
baselines,
grammar,
metrics,
baseline_execution
.summary
.selected_configuration
.as_ref()
.unwrap_or_else(|| {
panic!("baseline cohort execution must provide a selected configuration")
}),
&baseline_execution.selected_evaluation,
&recall_aware_feature_ranking,
semantic_layer,
pre_failure_lookback_runs,
)?;
let feature_policy_summary = build_feature_policy_summary(
metrics,
&baseline_feature_ranking,
&recall_aware_feature_ranking,
&burden_aware_feature_ranking,
&dsfb_aware_feature_ranking,
&feature_policy_overrides,
);
let policy_runtime = DsaPolicyRuntime {
feature_policy_overrides: feature_policy_overrides.clone(),
recall_rescue: RecallRescueConfig {
enabled: true,
..RecallRescueConfig::default()
},
semantic_rescue_support: build_semantic_rescue_support(
semantic_layer,
dataset.labels.len(),
),
};
let optimized_compression_execution = run_cohort_dsa_grid_with_policy(
dataset,
nominal,
residuals,
signs,
baselines,
grammar,
&baseline_feature_cohorts,
pre_failure_lookback_runs,
metrics,
&policy_runtime,
"compression_biased",
)?;
let recall_aware_execution = run_cohort_dsa_grid_with_policy(
dataset,
nominal,
residuals,
signs,
baselines,
grammar,
&recall_aware_feature_cohorts,
pre_failure_lookback_runs,
metrics,
&policy_runtime,
"recall_aware",
)?;
let burden_aware_execution = run_cohort_dsa_grid_with_policy(
dataset,
nominal,
residuals,
signs,
baselines,
grammar,
&burden_aware_feature_cohorts,
pre_failure_lookback_runs,
metrics,
&policy_runtime,
"burden_aware",
)?;
let dsfb_aware_execution = run_cohort_dsa_grid_with_policy(
dataset,
nominal,
residuals,
signs,
baselines,
grammar,
&dsfb_aware_feature_cohorts,
pre_failure_lookback_runs,
metrics,
&policy_runtime,
"dsfb_aware",
)?;
let mut union_rows = optimized_compression_execution
.summary
.cohort_results
.clone();
union_rows.extend(recall_aware_execution.summary.cohort_results.clone());
union_rows.extend(burden_aware_execution.summary.cohort_results.clone());
union_rows.extend(dsfb_aware_execution.summary.cohort_results.clone());
let operator_baselines =
build_operator_baselines(dataset, grammar, &baseline_execution.selected_evaluation);
let current_policy_dsa_nuisance = baseline_execution
.summary
.selected_configuration
.as_ref()
.map(|row| row.pass_run_nuisance_proxy)
.unwrap_or(
metrics
.summary
.pass_run_dsfb_persistent_boundary_nuisance_rate,
);
let pareto_frontier = pareto_frontier(&union_rows);
let stage_a_candidates = stage_a_candidates(
&union_rows,
metrics.summary.pass_run_dsfb_raw_boundary_nuisance_rate,
current_policy_dsa_nuisance,
);
let stage_b_candidates = stage_b_candidates(
&stage_a_candidates,
metrics.summary.pass_run_ewma_nuisance_rate,
current_policy_dsa_nuisance,
);
let stage1_candidates = stage1_candidates(&union_rows, &operator_baselines);
let stage2_candidates = stage2_candidates(&stage1_candidates, &operator_baselines);
let selected_row = stage2_candidates
.first()
.cloned()
.or_else(|| stage1_candidates.first().cloned())
.or_else(|| {
choose_optimized_row(
&stage_b_candidates,
&union_rows,
metrics.summary.pass_run_ewma_nuisance_rate,
metrics.summary.failure_runs_with_preceding_threshold_signal,
current_policy_dsa_nuisance,
)
})
.ok_or_else(|| {
DsfbSemiconductorError::DatasetFormat(
"optimized search produced no selectable configuration".into(),
)
})?;
let selected_evaluation = rebuild_selected_evaluation_with_policy(
dataset,
nominal,
residuals,
signs,
baselines,
grammar,
&baseline_feature_cohorts,
&recall_aware_feature_cohorts,
&burden_aware_feature_cohorts,
&dsfb_aware_feature_cohorts,
pre_failure_lookback_runs,
&selected_row,
&policy_runtime,
)?;
let mut optimized_execution = match selected_row.ranking_strategy.as_str() {
"recall_aware" => recall_aware_execution.clone(),
"burden_aware" => burden_aware_execution.clone(),
"dsfb_aware" => dsfb_aware_execution.clone(),
_ => optimized_compression_execution.clone(),
};
optimized_execution.selected_evaluation = selected_evaluation.clone();
optimized_execution.summary.selected_configuration = Some(selected_row.clone());
let recall_rescue_results = union_rows
.iter()
.map(|row| RecallRescueResultRow {
ranking_strategy: row.ranking_strategy.clone(),
cohort_name: row.cohort_name.clone(),
window: row.window,
persistence_runs: row.persistence_runs,
alert_tau: row.alert_tau,
corroborating_m: row.corroborating_m,
failure_recall: row.failure_recall,
pass_run_nuisance_proxy: row.pass_run_nuisance_proxy,
rescued_point_count: row.rescued_point_count,
rescued_watch_to_review_points: row.rescued_watch_to_review_points,
rescued_review_to_escalate_points: row.rescued_review_to_escalate_points,
})
.collect::<Vec<_>>();
let missed_failure_diagnostics = build_missed_failure_diagnostics(
&baseline_execution.selected_evaluation,
&selected_evaluation,
&feature_policy_overrides,
);
let recall_critical_features = build_recall_critical_features(
&baseline_execution.selected_evaluation,
&selected_evaluation,
&baseline_feature_ranking,
&recall_aware_feature_ranking,
&feature_policy_overrides,
&recall_rescue_contributions,
);
let policy_contribution_analysis = build_policy_contribution_analysis(
&baseline_execution.selected_evaluation,
&selected_evaluation,
&selected_row,
);
let operator_delta_targets = compute_operator_delta_targets(
&selected_row,
&selected_evaluation,
&operator_baselines,
metrics,
);
let operator_delta_attainment_matrix = build_operator_delta_attainment_matrix(
&selected_row,
&stage1_candidates,
&stage2_candidates,
&operator_baselines,
metrics,
);
let policy_operator_burden_contributions = build_policy_operator_burden_contributions(
dataset,
&baseline_execution.selected_evaluation,
&selected_evaluation,
&selected_row,
);
let recall_recovery_efficiency = build_recall_recovery_efficiency(
dataset,
&baseline_execution.selected_evaluation,
&selected_evaluation,
pre_failure_lookback_runs,
);
let delta_target_assessment = compute_delta_target_assessment(
&selected_row,
&stage_a_candidates,
&union_rows,
baseline_execution
.summary
.selected_configuration
.as_ref()
.unwrap_or_else(|| {
panic!("baseline cohort execution must provide a selected configuration")
}),
metrics,
);
Ok(OptimizationExecution {
baseline_feature_ranking,
baseline_feature_cohorts,
baseline_execution,
recall_aware_feature_ranking,
burden_aware_feature_ranking,
dsfb_aware_feature_ranking,
ranking_comparison,
recall_aware_feature_cohorts,
burden_aware_feature_cohorts,
dsfb_aware_feature_cohorts,
feature_policy_overrides,
feature_policy_summary,
optimized_execution,
recall_aware_execution,
burden_aware_execution,
dsfb_aware_execution,
pareto_frontier,
stage_a_candidates,
stage_b_candidates,
stage1_candidates,
stage2_candidates,
recall_rescue_results,
missed_failure_diagnostics,
recall_critical_features,
policy_contribution_analysis,
operator_baselines,
operator_delta_targets,
operator_delta_attainment_matrix,
policy_operator_burden_contributions,
recall_recovery_efficiency,
single_change_iteration_log,
delta_target_assessment,
})
}
#[derive(Debug, Clone, Default)]
struct FailureSupportCandidate {
feature_name: String,
support_failure_count: usize,
max_score: f64,
max_boundary_density: f64,
max_ewma_occupancy: f64,
max_motif_recurrence: f64,
pass_review_burden: usize,
}
fn build_semantic_rescue_support(
semantic_layer: &SemanticLayer,
run_count: usize,
) -> BTreeMap<usize, Vec<bool>> {
let mut support = BTreeMap::<usize, Vec<bool>>::new();
for row in &semantic_layer.semantic_matches {
if !is_strong_semantic_rescue_heuristic(&row.heuristic_name) {
continue;
}
support
.entry(row.feature_index)
.or_insert_with(|| vec![false; run_count])[row.run_index] = true;
}
support
}
fn is_strong_semantic_rescue_heuristic(heuristic_name: &str) -> bool {
matches!(
heuristic_name,
PERSISTENT_INSTABILITY_CLUSTER
| PRE_FAILURE_SLOW_DRIFT
| RECURRENT_BOUNDARY_APPROACH
| TRANSITION_EXCURSION
)
}
fn feature_review_burden_maps(
dataset: &PreparedDataset,
evaluation: &DsaEvaluation,
pre_failure_lookback_runs: usize,
) -> (BTreeMap<usize, usize>, BTreeMap<usize, usize>) {
let failure_indices = dataset
.labels
.iter()
.enumerate()
.filter_map(|(index, label)| (*label == 1).then_some(index))
.collect::<Vec<_>>();
let failure_window_mask = build_failure_window_mask(
dataset.labels.len(),
&failure_indices,
pre_failure_lookback_runs,
);
let mut pass_review_burden = BTreeMap::new();
let mut pre_failure_review_burden = BTreeMap::new();
for trace in &evaluation.traces {
let pass_count = trace
.dsa_alert
.iter()
.enumerate()
.filter(|(run_index, flag)| dataset.labels[*run_index] == -1 && **flag)
.count();
let pre_failure_count = trace
.dsa_alert
.iter()
.enumerate()
.filter(|(run_index, flag)| failure_window_mask[*run_index] && **flag)
.count();
pass_review_burden.insert(trace.feature_index, pass_count);
pre_failure_review_burden.insert(trace.feature_index, pre_failure_count);
}
(pass_review_burden, pre_failure_review_burden)
}
fn build_failure_window_mask(
len: usize,
failure_indices: &[usize],
pre_failure_lookback_runs: usize,
) -> Vec<bool> {
let mut mask = vec![false; len];
for &failure_index in failure_indices {
let start = failure_index.saturating_sub(pre_failure_lookback_runs);
for flag in &mut mask[start..failure_index] {
*flag = true;
}
}
mask
}
fn failure_local_support_candidates(
baseline_selected_row: &CohortGridResult,
baseline_evaluation: &DsaEvaluation,
semantic_layer: &SemanticLayer,
pre_failure_lookback_runs: usize,
pass_review_burden_by_feature: &BTreeMap<usize, usize>,
) -> BTreeMap<usize, FailureSupportCandidate> {
let semantic_support = build_semantic_rescue_support(
semantic_layer,
baseline_evaluation
.traces
.first()
.map(|trace| trace.dsa_score.len())
.unwrap_or_default(),
);
let traces_by_feature = baseline_evaluation
.traces
.iter()
.map(|trace| (trace.feature_index, trace))
.collect::<BTreeMap<_, _>>();
let mut candidates = BTreeMap::<usize, FailureSupportCandidate>::new();
for signal in baseline_evaluation
.per_failure_run_signals
.iter()
.filter(|signal| signal.earliest_dsa_run.is_none())
{
let failure_index = signal.failure_run_index;
let start = failure_index.saturating_sub(pre_failure_lookback_runs);
let mut per_failure = traces_by_feature
.values()
.filter_map(|trace| {
let semantic_hits = semantic_support
.get(&trace.feature_index)
.map(|flags| {
flags[start..failure_index]
.iter()
.filter(|flag| **flag)
.count()
})
.unwrap_or(0);
if semantic_hits == 0 {
return None;
}
let max_score = trace.dsa_score[start..failure_index]
.iter()
.copied()
.fold(0.0, f64::max);
if max_score < (baseline_selected_row.alert_tau - 1.0).max(0.0) {
return None;
}
let pass_review_burden = pass_review_burden_by_feature
.get(&trace.feature_index)
.copied()
.unwrap_or(0);
if pass_review_burden > 12 {
return None;
}
let max_boundary_density = trace.boundary_density_w[start..failure_index]
.iter()
.copied()
.fold(0.0, f64::max);
let max_ewma_occupancy = trace.ewma_occupancy_w[start..failure_index]
.iter()
.copied()
.fold(0.0, f64::max);
let max_motif_recurrence = trace.motif_recurrence_w[start..failure_index]
.iter()
.copied()
.fold(0.0, f64::max);
let support_score = max_score
+ 0.5 * max_motif_recurrence
+ 0.25 * max_boundary_density
+ 0.25 * max_ewma_occupancy;
Some((
trace.feature_index,
trace.feature_name.clone(),
semantic_hits,
max_score,
max_boundary_density,
max_ewma_occupancy,
max_motif_recurrence,
pass_review_burden,
support_score,
))
})
.collect::<Vec<_>>();
per_failure.sort_by(|left, right| {
right
.8
.partial_cmp(&left.8)
.unwrap_or(Ordering::Equal)
.then_with(|| left.1.cmp(&right.1))
});
for (
feature_index,
feature_name,
_semantic_hits,
max_score,
max_boundary_density,
max_ewma_occupancy,
max_motif_recurrence,
pass_review_burden,
_support_score,
) in per_failure.into_iter().take(3)
{
let entry = candidates.entry(feature_index).or_default();
entry.feature_name = feature_name;
entry.support_failure_count += 1;
entry.max_score = entry.max_score.max(max_score);
entry.max_boundary_density = entry.max_boundary_density.max(max_boundary_density);
entry.max_ewma_occupancy = entry.max_ewma_occupancy.max(max_ewma_occupancy);
entry.max_motif_recurrence = entry.max_motif_recurrence.max(max_motif_recurrence);
entry.pass_review_burden = pass_review_burden;
}
}
candidates
}
fn build_feature_policy_overrides(
dataset: &PreparedDataset,
nominal: &NominalModel,
residuals: &ResidualSet,
signs: &SignSet,
baselines: &BaselineSet,
grammar: &GrammarSet,
metrics: &BenchmarkMetrics,
baseline_selected_row: &CohortGridResult,
baseline_evaluation: &DsaEvaluation,
recall_aware_ranking: &[FeatureRankingRow],
semantic_layer: &SemanticLayer,
pre_failure_lookback_runs: usize,
) -> Result<(Vec<FeaturePolicyOverride>, Vec<SingleChangeIterationRow>)> {
let feature_metrics = metrics
.feature_metrics
.iter()
.map(|feature| (feature.feature_index, feature))
.collect::<BTreeMap<_, _>>();
let recall_rank_by_feature = recall_aware_ranking
.iter()
.map(|row| (row.feature_index, row))
.collect::<BTreeMap<_, _>>();
let (pass_review_burden_by_feature, pre_failure_review_burden_by_feature) =
feature_review_burden_maps(dataset, baseline_evaluation, pre_failure_lookback_runs);
let support_candidates = failure_local_support_candidates(
baseline_selected_row,
baseline_evaluation,
semantic_layer,
pre_failure_lookback_runs,
&pass_review_burden_by_feature,
);
let mut missed_feature_stats = BTreeMap::<usize, (String, usize, f64)>::new();
for signal in baseline_evaluation
.per_failure_run_signals
.iter()
.filter(|signal| signal.earliest_dsa_run.is_none())
{
let Some(feature_index) = signal.max_dsa_score_feature_index else {
continue;
};
let Some(feature_name) = signal.max_dsa_score_feature_name.as_ref() else {
continue;
};
let score = signal.max_dsa_score_in_lookback.unwrap_or(0.0);
let entry = missed_feature_stats
.entry(feature_index)
.or_insert_with(|| (feature_name.clone(), 0, 0.0));
entry.1 += 1;
entry.2 = entry.2.max(score);
}
let mut overrides = missed_feature_stats
.into_iter()
.filter_map(|(feature_index, (feature_name, miss_count, max_score))| {
let feature_metric = feature_metrics.get(&feature_index)?;
let recall_rank = recall_rank_by_feature.get(&feature_index).map(|row| row.rank);
let max_score_floor = baseline_selected_row.alert_tau - 0.40;
if max_score < max_score_floor
|| feature_metric.missing_fraction > OPTIMIZATION_OVERRIDE_MAX_MISSINGNESS
|| feature_metric.pre_failure_run_hits == 0
|| feature_metric.motif_precision_proxy.unwrap_or(0.0) <= 0.0
{
return None;
}
let rescue_priority =
if miss_count >= 2 || max_score >= baseline_selected_row.alert_tau - 0.10 {
2
} else {
1
};
let fragmentation_override =
if feature_metric.motif_precision_proxy.unwrap_or(0.0) >= 0.70
&& max_score >= baseline_selected_row.alert_tau - 0.10
{
1.0
} else {
OPTIMIZATION_RESCUE_FRAGMENTATION
};
Some(FeaturePolicyOverride {
feature_index,
feature_name: feature_name.clone(),
alert_class_override: None,
requires_persistence_override: Some(false),
requires_corroboration_override: Some(false),
minimum_window_override: Some(OPTIMIZATION_RESCUE_WINDOW),
minimum_hits_override: Some(OPTIMIZATION_RESCUE_MIN_HITS),
maximum_allowed_fragmentation_override: Some(fragmentation_override),
rescue_eligible: true,
rescue_priority,
allow_watch_only: Some(false),
allow_review_without_escalate: Some(true),
suppress_if_isolated: Some(false),
override_reason: format!(
"Feature was the nearest current-DSA miss on {} failure run(s), max near-miss score {:.4}, recall-aware rank {}, pre_failure_run_hits={}, motif_precision_proxy={}, rescue_fragmentation_ceiling={:.2}.",
miss_count,
max_score,
recall_rank
.map(|rank| rank.to_string())
.unwrap_or_else(|| "n/a".into()),
feature_metric.pre_failure_run_hits,
format_option_f64(feature_metric.motif_precision_proxy),
fragmentation_override,
),
})
})
.collect::<Vec<_>>();
let existing_override_features = overrides
.iter()
.map(|override_entry| override_entry.feature_index)
.collect::<BTreeSet<_>>();
let mut support_overrides = support_candidates
.into_iter()
.filter(|(feature_index, _)| !existing_override_features.contains(feature_index))
.filter_map(|(feature_index, candidate)| {
let feature_metric = feature_metrics.get(&feature_index)?;
let recall_rank = recall_rank_by_feature.get(&feature_index).map(|row| row.rank);
if feature_metric.missing_fraction > OPTIMIZATION_OVERRIDE_MAX_MISSINGNESS
|| feature_metric.pre_failure_run_hits == 0
|| feature_metric.motif_precision_proxy.unwrap_or(0.0) < 0.45
{
return None;
}
Some(FeaturePolicyOverride {
feature_index,
feature_name: candidate.feature_name.clone(),
alert_class_override: Some(HeuristicAlertClass::Watch),
requires_persistence_override: Some(false),
requires_corroboration_override: Some(false),
minimum_window_override: Some(2),
minimum_hits_override: Some(1),
maximum_allowed_fragmentation_override: Some(1.0),
rescue_eligible: true,
rescue_priority: 3,
allow_watch_only: Some(false),
allow_review_without_escalate: Some(true),
suppress_if_isolated: Some(false),
override_reason: format!(
"Feature is a low-burden grammar-qualified support candidate for {} missed failure run(s), max near-miss score {:.4}, max_boundary_density={:.2}, max_ewma_occupancy={:.2}, max_motif_recurrence={:.2}, recall-aware rank {}, pass_review_burden={}.",
candidate.support_failure_count,
candidate.max_score,
candidate.max_boundary_density,
candidate.max_ewma_occupancy,
candidate.max_motif_recurrence,
recall_rank
.map(|rank| rank.to_string())
.unwrap_or_else(|| "n/a".into()),
candidate.pass_review_burden,
),
})
})
.collect::<Vec<_>>();
let protected_features = overrides
.iter()
.chain(&support_overrides)
.map(|override_entry| override_entry.feature_index)
.collect::<BTreeSet<_>>();
let mut nuisance_overrides = pass_review_burden_by_feature
.iter()
.filter(|(feature_index, _)| !protected_features.contains(feature_index))
.filter_map(|(&feature_index, &pass_review_burden)| {
let feature_metric = feature_metrics.get(&feature_index)?;
let pre_failure_review_burden = pre_failure_review_burden_by_feature
.get(&feature_index)
.copied()
.unwrap_or(0);
if pass_review_burden < 300
|| pre_failure_review_burden.saturating_mul(20) > pass_review_burden
|| feature_metric.missing_fraction > OPTIMIZATION_OVERRIDE_MAX_MISSINGNESS
{
return None;
}
Some((
pass_review_burden,
pre_failure_review_burden,
FeaturePolicyOverride {
feature_index,
feature_name: feature_metric.feature_name.clone(),
alert_class_override: Some(HeuristicAlertClass::Watch),
requires_persistence_override: Some(true),
requires_corroboration_override: Some(true),
minimum_window_override: Some(OPTIMIZATION_RESCUE_WINDOW),
minimum_hits_override: Some(OPTIMIZATION_RESCUE_MIN_HITS),
maximum_allowed_fragmentation_override: Some(
OPTIMIZATION_RESCUE_FRAGMENTATION,
),
rescue_eligible: false,
rescue_priority: 0,
allow_watch_only: Some(true),
allow_review_without_escalate: Some(false),
suppress_if_isolated: Some(true),
override_reason: format!(
"Feature dominates pass-run burden ({}) but contributes only {} pre-failure Review/Escalate points inside the fixed lookback windows; clamp to Watch and suppress if isolated.",
pass_review_burden, pre_failure_review_burden,
),
},
))
})
.collect::<Vec<_>>();
nuisance_overrides.sort_by(|left, right| {
right
.0
.cmp(&left.0)
.then_with(|| left.1.cmp(&right.1))
.then_with(|| left.2.feature_name.cmp(&right.2.feature_name))
});
let mut nuisance_overrides = nuisance_overrides
.into_iter()
.take(MAX_FAILURE_DRIVEN_NUISANCE_OVERRIDES)
.map(|(_, _, override_entry)| override_entry)
.collect::<Vec<_>>();
let mut iteration = 1usize;
let mut current_overrides = Vec::new();
let mut current_evaluation = baseline_evaluation.clone();
let mut iteration_log = Vec::new();
overrides.sort_by(|left, right| left.feature_name.cmp(&right.feature_name));
support_overrides.sort_by(|left, right| {
right
.rescue_priority
.cmp(&left.rescue_priority)
.then_with(|| left.feature_name.cmp(&right.feature_name))
});
for candidate in overrides.into_iter().chain(support_overrides.into_iter()) {
let mut candidate_overrides = current_overrides.clone();
candidate_overrides.push(candidate.clone());
let candidate_evaluation = evaluate_selected_row_with_overrides(
dataset,
nominal,
residuals,
signs,
baselines,
grammar,
baseline_selected_row,
baseline_evaluation,
semantic_layer,
pre_failure_lookback_runs,
&candidate_overrides,
)?;
let affected_failures =
newly_recovered_failures(¤t_evaluation, &candidate_evaluation);
let accepted = !affected_failures.is_empty();
iteration_log.push(single_change_iteration_row(
iteration,
"rescue_rule",
&candidate.feature_name,
candidate.override_reason.clone(),
&affected_failures,
"",
accepted,
¤t_evaluation,
&candidate_evaluation,
));
iteration += 1;
if accepted {
current_overrides = candidate_overrides;
current_evaluation = candidate_evaluation;
}
}
let mut protected_features = current_overrides
.iter()
.map(|override_entry| override_entry.feature_index)
.collect::<BTreeSet<_>>();
let isolated_nuisance_overrides =
build_isolated_nuisance_overrides(dataset, ¤t_evaluation, &protected_features);
nuisance_overrides.extend(isolated_nuisance_overrides);
nuisance_overrides.sort_by(|left, right| left.feature_name.cmp(&right.feature_name));
for candidate in nuisance_overrides {
if protected_features.contains(&candidate.feature_index) {
continue;
}
let mut candidate_overrides = current_overrides.clone();
candidate_overrides.push(candidate.clone());
let candidate_evaluation = evaluate_selected_row_with_overrides(
dataset,
nominal,
residuals,
signs,
baselines,
grammar,
baseline_selected_row,
baseline_evaluation,
semantic_layer,
pre_failure_lookback_runs,
&candidate_overrides,
)?;
let accepted = candidate_evaluation.summary.failure_run_recall
>= current_evaluation.summary.failure_run_recall
&& (candidate_evaluation.summary.alert_point_count
< current_evaluation.summary.alert_point_count
|| candidate_evaluation.episode_summary.dsa_episode_count
< current_evaluation.episode_summary.dsa_episode_count
|| compare_option_gt(
candidate_evaluation.episode_summary.precursor_quality,
current_evaluation.episode_summary.precursor_quality,
) == Some(true));
iteration_log.push(single_change_iteration_row(
iteration,
"policy_constraint",
&candidate.feature_name,
candidate.override_reason.clone(),
&[],
"isolated_pass_only_episode",
accepted,
¤t_evaluation,
&candidate_evaluation,
));
iteration += 1;
if accepted {
protected_features.insert(candidate.feature_index);
current_overrides = candidate_overrides;
current_evaluation = candidate_evaluation;
}
}
current_overrides.sort_by(|left, right| {
right
.rescue_priority
.cmp(&left.rescue_priority)
.then_with(|| left.feature_name.cmp(&right.feature_name))
});
Ok((current_overrides, iteration_log))
}
fn evaluate_selected_row_with_overrides(
dataset: &PreparedDataset,
nominal: &NominalModel,
residuals: &ResidualSet,
signs: &SignSet,
baselines: &BaselineSet,
grammar: &GrammarSet,
baseline_selected_row: &CohortGridResult,
baseline_evaluation: &DsaEvaluation,
semantic_layer: &SemanticLayer,
pre_failure_lookback_runs: usize,
feature_policy_overrides: &[FeaturePolicyOverride],
) -> Result<DsaEvaluation> {
let policy_runtime = DsaPolicyRuntime {
feature_policy_overrides: feature_policy_overrides.to_vec(),
recall_rescue: RecallRescueConfig {
enabled: true,
..RecallRescueConfig::default()
},
semantic_rescue_support: build_semantic_rescue_support(
semantic_layer,
dataset.labels.len(),
),
};
let config = DsaConfig {
window: baseline_selected_row.window,
persistence_runs: baseline_selected_row.persistence_runs,
alert_tau: baseline_selected_row.alert_tau,
corroborating_feature_count_min: 1,
};
let base_evaluation = evaluate_dsa_with_policy(
dataset,
nominal,
residuals,
signs,
baselines,
grammar,
&config,
pre_failure_lookback_runs,
&policy_runtime,
)?;
let feature_indices = baseline_evaluation
.traces
.iter()
.map(|trace| trace.feature_index)
.collect::<Vec<_>>();
project_dsa_to_cohort(
dataset,
nominal,
residuals,
baselines,
grammar,
&base_evaluation,
&feature_indices,
baseline_selected_row.corroborating_m,
pre_failure_lookback_runs,
&baseline_selected_row.cohort_name,
)
}
fn newly_recovered_failures(previous: &DsaEvaluation, candidate: &DsaEvaluation) -> Vec<usize> {
let previous_detected = previous
.per_failure_run_signals
.iter()
.filter(|row| row.earliest_dsa_run.is_some())
.map(|row| row.failure_run_index)
.collect::<BTreeSet<_>>();
candidate
.per_failure_run_signals
.iter()
.filter(|row| row.earliest_dsa_run.is_some())
.map(|row| row.failure_run_index)
.filter(|failure_id| !previous_detected.contains(failure_id))
.collect()
}
fn single_change_iteration_row(
iteration: usize,
change_kind: &str,
change_target: &str,
reason: String,
affected_failures: &[usize],
targets_nuisance_class: &str,
accepted: bool,
previous: &DsaEvaluation,
candidate: &DsaEvaluation,
) -> SingleChangeIterationRow {
SingleChangeIterationRow {
iteration,
change_kind: change_kind.into(),
change_target: change_target.into(),
reason,
derived_from_failures: affected_failures
.iter()
.map(|failure_id| failure_id.to_string())
.collect::<Vec<_>>()
.join(","),
targets_nuisance_class: targets_nuisance_class.into(),
affected_failures: affected_failures
.iter()
.map(|failure_id| failure_id.to_string())
.collect::<Vec<_>>()
.join(","),
accepted,
recall: candidate.summary.failure_run_recall,
investigation_points: candidate.summary.alert_point_count,
episode_count: candidate.episode_summary.dsa_episode_count,
precursor_quality: candidate.episode_summary.precursor_quality,
pass_run_nuisance_proxy: candidate.summary.pass_run_nuisance_proxy,
delta_recall: candidate.summary.failure_run_recall as i64
- previous.summary.failure_run_recall as i64,
delta_investigation_points: candidate.summary.alert_point_count as i64
- previous.summary.alert_point_count as i64,
delta_episode_count: candidate.episode_summary.dsa_episode_count as i64
- previous.episode_summary.dsa_episode_count as i64,
delta_precursor_quality: match (
previous.episode_summary.precursor_quality,
candidate.episode_summary.precursor_quality,
) {
(Some(previous_value), Some(candidate_value)) => Some(candidate_value - previous_value),
_ => None,
},
delta_pass_run_nuisance_proxy: candidate.summary.pass_run_nuisance_proxy
- previous.summary.pass_run_nuisance_proxy,
}
}
fn build_isolated_nuisance_overrides(
dataset: &PreparedDataset,
evaluation: &DsaEvaluation,
protected_features: &BTreeSet<usize>,
) -> Vec<FeaturePolicyOverride> {
let isolated_episode_counts = isolated_pass_episode_counts_by_feature(dataset, evaluation);
let failure_alert_counts = evaluation
.traces
.iter()
.map(|trace| {
(
trace.feature_index,
trace
.dsa_alert
.iter()
.enumerate()
.filter(|(run_index, flag)| dataset.labels[*run_index] == 1 && **flag)
.count(),
)
})
.collect::<BTreeMap<_, _>>();
let pass_review_burden = evaluation
.traces
.iter()
.map(|trace| {
(
trace.feature_index,
trace
.dsa_alert
.iter()
.enumerate()
.filter(|(run_index, flag)| dataset.labels[*run_index] == -1 && **flag)
.count(),
)
})
.collect::<BTreeMap<_, _>>();
let mut overrides = evaluation
.traces
.iter()
.filter(|trace| !protected_features.contains(&trace.feature_index))
.filter_map(|trace| {
let isolated_episode_count = isolated_episode_counts
.get(&trace.feature_index)
.copied()
.unwrap_or(0);
let failure_alert_count = failure_alert_counts
.get(&trace.feature_index)
.copied()
.unwrap_or(0);
let pass_review_count = pass_review_burden
.get(&trace.feature_index)
.copied()
.unwrap_or(0);
if isolated_episode_count == 0 || failure_alert_count > 0 || pass_review_count == 0 {
return None;
}
Some((
isolated_episode_count,
pass_review_count,
FeaturePolicyOverride {
feature_index: trace.feature_index,
feature_name: trace.feature_name.clone(),
alert_class_override: Some(HeuristicAlertClass::Watch),
requires_persistence_override: Some(true),
requires_corroboration_override: Some(true),
minimum_window_override: Some(3),
minimum_hits_override: Some(2),
maximum_allowed_fragmentation_override: Some(0.5),
rescue_eligible: false,
rescue_priority: 0,
allow_watch_only: Some(true),
allow_review_without_escalate: Some(false),
suppress_if_isolated: Some(true),
override_reason: format!(
"Feature drives {} isolated pass-only episode(s) with {} pass-run alert points and no failure-local alert points; clamp to Watch and suppress if isolated.",
isolated_episode_count, pass_review_count
),
},
))
})
.collect::<Vec<_>>();
overrides.sort_by(|left, right| {
right
.0
.cmp(&left.0)
.then_with(|| right.1.cmp(&left.1))
.then_with(|| left.2.feature_name.cmp(&right.2.feature_name))
});
overrides
.into_iter()
.take(MAX_FAILURE_DRIVEN_ISOLATED_NUISANCE_OVERRIDES)
.map(|(_, _, override_entry)| override_entry)
.collect()
}
fn isolated_pass_episode_counts_by_feature(
dataset: &PreparedDataset,
evaluation: &DsaEvaluation,
) -> BTreeMap<usize, usize> {
let alerted_features_by_run = (0..dataset.labels.len())
.map(|run_index| {
evaluation
.traces
.iter()
.filter(|trace| trace.dsa_alert[run_index])
.map(|trace| trace.feature_index)
.collect::<Vec<_>>()
})
.collect::<Vec<_>>();
let episode_ranges = episode_ranges(&evaluation.run_signals.primary_run_alert);
let mut counts = BTreeMap::<usize, usize>::new();
for (start, end) in episode_ranges {
if (start..=end).any(|run_index| dataset.labels[run_index] == 1) {
continue;
}
let unique_features = (start..=end)
.flat_map(|run_index| alerted_features_by_run[run_index].iter().copied())
.collect::<BTreeSet<_>>();
if unique_features.len() == 1 {
let feature_index = *unique_features.iter().next().expect("one feature");
*counts.entry(feature_index).or_default() += 1;
}
}
counts
}
fn build_feature_policy_summary(
metrics: &BenchmarkMetrics,
baseline_ranking: &[FeatureRankingRow],
recall_aware_ranking: &[FeatureRankingRow],
burden_aware_ranking: &[FeatureRankingRow],
dsfb_aware_ranking: &[FeatureRankingRow],
overrides: &[FeaturePolicyOverride],
) -> Vec<FeaturePolicySummaryRow> {
let feature_metrics = metrics
.feature_metrics
.iter()
.map(|feature| (feature.feature_index, feature))
.collect::<BTreeMap<_, _>>();
let baseline_by_feature = baseline_ranking
.iter()
.map(|row| (row.feature_index, row))
.collect::<BTreeMap<_, _>>();
let recall_by_feature = recall_aware_ranking
.iter()
.map(|row| (row.feature_index, row))
.collect::<BTreeMap<_, _>>();
let burden_by_feature = burden_aware_ranking
.iter()
.map(|row| (row.feature_index, row))
.collect::<BTreeMap<_, _>>();
let dsfb_by_feature = dsfb_aware_ranking
.iter()
.map(|row| (row.feature_index, row))
.collect::<BTreeMap<_, _>>();
overrides
.iter()
.filter_map(|override_entry| {
let feature_metric = feature_metrics.get(&override_entry.feature_index)?;
Some(FeaturePolicySummaryRow {
feature_index: override_entry.feature_index,
feature_name: override_entry.feature_name.clone(),
compression_rank: baseline_by_feature
.get(&override_entry.feature_index)
.map(|row| row.rank),
recall_aware_rank: recall_by_feature
.get(&override_entry.feature_index)
.map(|row| row.rank),
burden_aware_rank: burden_by_feature
.get(&override_entry.feature_index)
.map(|row| row.rank),
dsfb_aware_rank: dsfb_by_feature
.get(&override_entry.feature_index)
.map(|row| row.rank),
pre_failure_run_hits: feature_metric.pre_failure_run_hits,
motif_precision_proxy: feature_metric.motif_precision_proxy,
missing_fraction: feature_metric.missing_fraction,
rescue_eligible: override_entry.rescue_eligible,
rescue_priority: override_entry.rescue_priority,
alert_class_override: override_entry.alert_class_override,
requires_persistence_override: override_entry.requires_persistence_override,
requires_corroboration_override: override_entry.requires_corroboration_override,
minimum_window_override: override_entry.minimum_window_override,
minimum_hits_override: override_entry.minimum_hits_override,
maximum_allowed_fragmentation_override: override_entry
.maximum_allowed_fragmentation_override,
override_reason: override_entry.override_reason.clone(),
allow_watch_only: override_entry.allow_watch_only,
allow_review_without_escalate: override_entry.allow_review_without_escalate,
suppress_if_isolated: override_entry.suppress_if_isolated,
})
})
.collect()
}
fn recall_rescue_contribution_by_feature(
baseline_evaluation: &DsaEvaluation,
) -> BTreeMap<usize, f64> {
let mut contributions = BTreeMap::<usize, f64>::new();
for signal in baseline_evaluation
.per_failure_run_signals
.iter()
.filter(|signal| signal.earliest_dsa_run.is_none())
{
let Some(feature_index) = signal.max_dsa_score_feature_index else {
continue;
};
*contributions.entry(feature_index).or_default() += 1.0;
}
contributions
}
fn build_recall_critical_features(
baseline: &DsaEvaluation,
optimized: &DsaEvaluation,
baseline_ranking: &[FeatureRankingRow],
recall_aware_ranking: &[FeatureRankingRow],
feature_policy_overrides: &[FeaturePolicyOverride],
recall_rescue_contributions: &BTreeMap<usize, f64>,
) -> Vec<RecallCriticalFeatureRow> {
let optimized_by_failure = optimized
.per_failure_run_signals
.iter()
.map(|row| (row.failure_run_index, row))
.collect::<BTreeMap<_, _>>();
let baseline_rank_by_feature = baseline_ranking
.iter()
.map(|row| (row.feature_index, row.rank))
.collect::<BTreeMap<_, _>>();
let recall_rank_by_feature = recall_aware_ranking
.iter()
.map(|row| (row.feature_index, row.rank))
.collect::<BTreeMap<_, _>>();
let overrides_by_feature = feature_policy_overrides
.iter()
.map(|override_entry| (override_entry.feature_index, override_entry))
.collect::<BTreeMap<_, _>>();
baseline
.per_failure_run_signals
.iter()
.filter(|row| row.earliest_dsa_run.is_none())
.map(|row| {
let feature_index = row.max_dsa_score_feature_index;
let override_entry = feature_index
.and_then(|feature_index| overrides_by_feature.get(&feature_index).copied());
let optimized_row = optimized_by_failure.get(&row.failure_run_index).copied();
RecallCriticalFeatureRow {
failure_run_index: row.failure_run_index,
feature_index,
feature_name: row.max_dsa_score_feature_name.clone(),
compression_rank: feature_index.and_then(|feature_index| {
baseline_rank_by_feature.get(&feature_index).copied()
}),
recall_aware_rank: feature_index
.and_then(|feature_index| recall_rank_by_feature.get(&feature_index).copied()),
max_structural_score: row.max_dsa_score_in_lookback,
resolved_alert_class: row.max_dsa_score_resolved_alert_class.clone(),
policy_state: row.max_dsa_score_policy_state.clone(),
boundary_density_w: row.max_dsa_score_boundary_density_w,
ewma_occupancy_w: row.max_dsa_score_ewma_occupancy_w,
motif_recurrence_w: row.max_dsa_score_motif_recurrence_w,
fragmentation_proxy_w: row.max_dsa_score_fragmentation_proxy_w,
consistent: row.max_dsa_score_consistent,
exact_miss_rule: if row
.max_dsa_score_consistent
.is_some_and(|consistent| !consistent)
&& row
.max_dsa_score_resolved_alert_class
.as_deref()
.is_some_and(|class| class == "Watch" || class == "Review")
{
"directional_consistency_gate".into()
} else if row.max_dsa_score_numeric_dsa_alert == Some(false)
&& row.max_dsa_score_in_lookback.is_some()
{
"watch_class_near_miss_below_numeric_gate".into()
} else if row.max_dsa_score_in_lookback.unwrap_or(0.0) < 2.0 {
"numeric_score_below_tau".into()
} else {
"policy_state_never_reached_review".into()
},
feature_override_exists: override_entry.is_some(),
rescue_priority: override_entry
.map(|override_entry| override_entry.rescue_priority),
allow_review_without_escalate: override_entry
.and_then(|override_entry| override_entry.allow_review_without_escalate),
bounded_feature_override_would_recover: optimized_row
.is_some_and(|optimized_row| optimized_row.earliest_dsa_run.is_some()),
recovered_after_optimization: optimized_row
.is_some_and(|optimized_row| optimized_row.earliest_dsa_run.is_some()),
optimized_feature_name: optimized_row
.and_then(|optimized_row| optimized_row.earliest_dsa_feature_name.clone()),
recall_rescue_contribution: feature_index
.and_then(|feature_index| {
recall_rescue_contributions.get(&feature_index).copied()
})
.unwrap_or(0.0),
}
})
.collect()
}
fn pareto_frontier(rows: &[CohortGridResult]) -> Vec<CohortGridResult> {
let recall_floor = 100usize;
let candidate_pool = rows
.iter()
.filter(|row| row.failure_recall >= recall_floor)
.collect::<Vec<_>>();
let candidate_pool = if candidate_pool.is_empty() {
rows.iter().collect::<Vec<_>>()
} else {
candidate_pool
};
let mut frontier = candidate_pool
.iter()
.filter(|row| {
!candidate_pool.iter().any(|other| {
other.grid_row_id != row.grid_row_id
&& delta_nuisance_relative(row.ewma_nuisance, other.pass_run_nuisance_proxy)
>= delta_nuisance_relative(row.ewma_nuisance, row.pass_run_nuisance_proxy)
&& other.failure_recall >= row.failure_recall
&& (delta_nuisance_relative(row.ewma_nuisance, other.pass_run_nuisance_proxy)
> delta_nuisance_relative(row.ewma_nuisance, row.pass_run_nuisance_proxy)
|| other.failure_recall > row.failure_recall)
})
})
.map(|row| (*row).clone())
.collect::<Vec<_>>();
frontier.sort_by(|left, right| compare_stage_b_rows(left, right, left.ewma_nuisance));
frontier
}
fn stage_a_candidates(
rows: &[CohortGridResult],
raw_boundary_nuisance: f64,
current_policy_dsa_nuisance: f64,
) -> Vec<CohortGridResult> {
let mut candidates = rows
.iter()
.filter(|row| {
row.pass_run_nuisance_proxy < raw_boundary_nuisance && row.failure_recall >= 100
})
.cloned()
.collect::<Vec<_>>();
candidates
.sort_by(|left, right| compare_stage_a_rows(left, right, current_policy_dsa_nuisance));
candidates
}
fn stage_b_candidates(
rows: &[CohortGridResult],
ewma_nuisance: f64,
current_policy_dsa_nuisance: f64,
) -> Vec<CohortGridResult> {
let mut candidates = rows.to_vec();
candidates.sort_by(|left, right| {
(left.pass_run_nuisance_proxy < ewma_nuisance)
.cmp(&(right.pass_run_nuisance_proxy < ewma_nuisance))
.reverse()
.then_with(|| compare_stage_b_rows(left, right, current_policy_dsa_nuisance))
});
candidates
}
fn stage1_candidates(
rows: &[CohortGridResult],
operator_baselines: &OperatorBaselines,
) -> Vec<CohortGridResult> {
let mut candidates = rows
.iter()
.filter(|row| row.failure_recall >= 100)
.cloned()
.collect::<Vec<_>>();
candidates.sort_by(|left, right| {
compare_operator_rows(left, right, operator_baselines).then_with(|| {
compare_stage_a_rows(
left,
right,
operator_baselines
.current_policy_dsa
.pass_run_nuisance_proxy,
)
})
});
candidates
}
fn stage2_candidates(
rows: &[CohortGridResult],
operator_baselines: &OperatorBaselines,
) -> Vec<CohortGridResult> {
let mut candidates = rows.to_vec();
candidates.sort_by(|left, right| compare_operator_rows(left, right, operator_baselines));
candidates
}
fn compare_operator_rows(
left: &CohortGridResult,
right: &CohortGridResult,
baselines: &OperatorBaselines,
) -> Ordering {
let left_delta_investigation = delta_relative_count(
baselines.baseline_investigation_points,
left.investigation_point_count,
);
let right_delta_investigation = delta_relative_count(
baselines.baseline_investigation_points,
right.investigation_point_count,
);
let left_delta_episode =
delta_relative_count(baselines.baseline_episode_count, left.dsa_episode_count);
let right_delta_episode =
delta_relative_count(baselines.baseline_episode_count, right.dsa_episode_count);
let left_delta_review_points = delta_relative_f64(
baselines.baseline_review_escalate_points_per_pass_run,
left.review_escalate_points_per_pass_run,
);
let right_delta_review_points = delta_relative_f64(
baselines.baseline_review_escalate_points_per_pass_run,
right.review_escalate_points_per_pass_run,
);
let left_delta_review_episodes = delta_relative_f64(
baselines.baseline_review_escalate_episodes_per_pass_run,
left.review_escalate_episodes_per_pass_run,
);
let right_delta_review_episodes = delta_relative_f64(
baselines.baseline_review_escalate_episodes_per_pass_run,
right.review_escalate_episodes_per_pass_run,
);
right
.failure_recall
.cmp(&left.failure_recall)
.then_with(|| {
right_delta_investigation
.partial_cmp(&left_delta_investigation)
.unwrap_or(Ordering::Equal)
})
.then_with(|| {
right_delta_episode
.partial_cmp(&left_delta_episode)
.unwrap_or(Ordering::Equal)
})
.then_with(|| compare_option_f64(right.precursor_quality, left.precursor_quality))
.then_with(|| {
right_delta_review_points
.partial_cmp(&left_delta_review_points)
.unwrap_or(Ordering::Equal)
})
.then_with(|| {
right_delta_review_episodes
.partial_cmp(&left_delta_review_episodes)
.unwrap_or(Ordering::Equal)
})
.then_with(|| {
delta_nuisance_relative(right.ewma_nuisance, right.pass_run_nuisance_proxy)
.partial_cmp(&delta_nuisance_relative(
left.ewma_nuisance,
left.pass_run_nuisance_proxy,
))
.unwrap_or(Ordering::Equal)
})
.then_with(|| compare_option_f64(right.mean_lead_time_runs, left.mean_lead_time_runs))
.then_with(|| left.cohort_name.cmp(&right.cohort_name))
}
fn choose_optimized_row(
stage_b_candidates: &[CohortGridResult],
all_rows: &[CohortGridResult],
ewma_nuisance: f64,
threshold_recall: usize,
current_policy_dsa_nuisance: f64,
) -> Option<CohortGridResult> {
stage_b_candidates.first().cloned().or_else(|| {
all_rows.iter().cloned().min_by(|left, right| {
let left_primary_gap = primary_success_gap(left);
let right_primary_gap = primary_success_gap(right);
left_primary_gap
.partial_cmp(&right_primary_gap)
.unwrap_or(Ordering::Equal)
.then_with(|| {
(left.pass_run_nuisance_proxy < ewma_nuisance)
.cmp(&(right.pass_run_nuisance_proxy < ewma_nuisance))
.reverse()
})
.then_with(|| {
let left_recall_gap = threshold_recall.saturating_sub(left.failure_recall);
let right_recall_gap = threshold_recall.saturating_sub(right.failure_recall);
left_recall_gap.cmp(&right_recall_gap)
})
.then_with(|| compare_stage_b_rows(left, right, current_policy_dsa_nuisance))
})
})
}
fn compare_stage_a_rows(
left: &CohortGridResult,
right: &CohortGridResult,
current_policy_dsa_nuisance: f64,
) -> Ordering {
delta_nuisance_relative(right.ewma_nuisance, right.pass_run_nuisance_proxy)
.partial_cmp(&delta_nuisance_relative(
left.ewma_nuisance,
left.pass_run_nuisance_proxy,
))
.unwrap_or(Ordering::Equal)
.then_with(|| {
delta_nuisance_relative(current_policy_dsa_nuisance, right.pass_run_nuisance_proxy)
.partial_cmp(&delta_nuisance_relative(
current_policy_dsa_nuisance,
left.pass_run_nuisance_proxy,
))
.unwrap_or(Ordering::Equal)
})
.then_with(|| right.failure_recall.cmp(&left.failure_recall))
.then_with(|| compare_option_f64(right.precursor_quality, left.precursor_quality))
.then_with(|| compare_option_f64(right.mean_lead_time_runs, left.mean_lead_time_runs))
.then_with(|| compare_option_f64(right.compression_ratio, left.compression_ratio))
}
fn compare_stage_b_rows(
left: &CohortGridResult,
right: &CohortGridResult,
current_policy_dsa_nuisance: f64,
) -> Ordering {
right
.failure_recall
.cmp(&left.failure_recall)
.then_with(|| {
delta_nuisance_relative(right.ewma_nuisance, right.pass_run_nuisance_proxy)
.partial_cmp(&delta_nuisance_relative(
left.ewma_nuisance,
left.pass_run_nuisance_proxy,
))
.unwrap_or(Ordering::Equal)
})
.then_with(|| compare_option_f64(right.precursor_quality, left.precursor_quality))
.then_with(|| compare_option_f64(right.mean_lead_time_runs, left.mean_lead_time_runs))
.then_with(|| compare_option_f64(right.compression_ratio, left.compression_ratio))
.then_with(|| {
delta_nuisance_relative(current_policy_dsa_nuisance, right.pass_run_nuisance_proxy)
.partial_cmp(&delta_nuisance_relative(
current_policy_dsa_nuisance,
left.pass_run_nuisance_proxy,
))
.unwrap_or(Ordering::Equal)
})
}
fn rebuild_selected_evaluation_with_policy(
dataset: &PreparedDataset,
nominal: &NominalModel,
residuals: &ResidualSet,
signs: &SignSet,
baselines: &BaselineSet,
grammar: &GrammarSet,
baseline_cohorts: &FeatureCohorts,
recall_aware_cohorts: &FeatureCohorts,
burden_aware_cohorts: &FeatureCohorts,
dsfb_aware_cohorts: &FeatureCohorts,
pre_failure_lookback_runs: usize,
row: &CohortGridResult,
policy_runtime: &DsaPolicyRuntime,
) -> Result<DsaEvaluation> {
let cohorts = match row.ranking_strategy.as_str() {
"recall_aware" => recall_aware_cohorts,
"burden_aware" => burden_aware_cohorts,
"dsfb_aware" => dsfb_aware_cohorts,
_ => baseline_cohorts,
};
let base_config = DsaConfig {
window: row.window,
persistence_runs: row.persistence_runs,
alert_tau: row.alert_tau,
corroborating_feature_count_min: 1,
};
let base_evaluation = evaluate_dsa_with_policy(
dataset,
nominal,
residuals,
signs,
baselines,
grammar,
&base_config,
pre_failure_lookback_runs,
policy_runtime,
)?;
let feature_indices = cohort_members(cohorts, &row.cohort_name)
.iter()
.map(|member| member.feature_index)
.collect::<Vec<_>>();
project_dsa_to_cohort(
dataset,
nominal,
residuals,
baselines,
grammar,
&base_evaluation,
&feature_indices,
row.corroborating_m,
pre_failure_lookback_runs,
&row.cohort_name,
)
}
fn build_missed_failure_diagnostics(
baseline: &DsaEvaluation,
optimized: &DsaEvaluation,
feature_policy_overrides: &[FeaturePolicyOverride],
) -> Vec<MissedFailureDiagnosticRow> {
let optimized_by_failure = optimized
.per_failure_run_signals
.iter()
.map(|row| (row.failure_run_index, row))
.collect::<BTreeMap<_, _>>();
let overrides_by_feature = feature_policy_overrides
.iter()
.map(|override_entry| (override_entry.feature_name.as_str(), override_entry))
.collect::<BTreeMap<_, _>>();
baseline
.per_failure_run_signals
.iter()
.filter(|row| row.earliest_dsa_run.is_none())
.map(|row| {
let optimized_row = optimized_by_failure.get(&row.failure_run_index).copied();
let resolved_watch = row
.max_dsa_score_resolved_alert_class
.as_deref()
.is_some_and(|value| value == "Watch" || value == "Review");
let override_entry = row
.max_dsa_score_feature_name
.as_deref()
.and_then(|feature_name| overrides_by_feature.get(feature_name))
.copied();
let fragmentation_ceiling = override_entry.is_some_and(|override_entry| {
row.max_dsa_score_fragmentation_proxy_w.unwrap_or(0.0)
> override_entry
.maximum_allowed_fragmentation_override
.unwrap_or(OPTIMIZATION_RESCUE_FRAGMENTATION)
});
let directional_consistency_gate =
row.max_dsa_score_consistent == Some(false) && resolved_watch;
let policy_suppression = row.max_dsa_score_policy_suppressed.unwrap_or(false)
|| (row
.max_dsa_score_policy_state
.as_deref()
.is_some_and(|state| state == "silent")
&& resolved_watch);
let persistence_gate = row
.max_dsa_score_policy_state
.as_deref()
.is_some_and(|state| state == "silent")
&& row.max_dsa_score_numeric_dsa_alert == Some(false)
&& row.max_dsa_score_in_lookback.is_some();
let rescue_eligible = override_entry.is_some();
let recovered_after_optimization =
optimized_row.is_some_and(|optimized_row| optimized_row.earliest_dsa_run.is_some());
MissedFailureDiagnosticRow {
failure_run_index: row.failure_run_index,
nearest_feature_name: row.max_dsa_score_feature_name.clone(),
nearest_feature_score: row.max_dsa_score_in_lookback,
nearest_feature_policy_state: row.max_dsa_score_policy_state.clone(),
nearest_feature_resolved_alert_class: row
.max_dsa_score_resolved_alert_class
.clone(),
nearest_feature_boundary_density_w: row.max_dsa_score_boundary_density_w,
nearest_feature_ewma_occupancy_w: row.max_dsa_score_ewma_occupancy_w,
nearest_feature_motif_recurrence_w: row.max_dsa_score_motif_recurrence_w,
nearest_feature_fragmentation_proxy_w: row.max_dsa_score_fragmentation_proxy_w,
nearest_feature_consistent: row.max_dsa_score_consistent,
ranking_exclusion: false,
cohort_selection: false,
policy_suppression,
fragmentation_ceiling,
directional_consistency_gate,
persistence_gate,
corroboration_threshold: false,
rescue_gate_not_activating: rescue_eligible && !recovered_after_optimization,
exact_miss_rule: if fragmentation_ceiling {
"feature_override_fragmentation_ceiling".into()
} else if directional_consistency_gate {
"directional_consistency_gate".into()
} else if persistence_gate {
"watch_class_near_miss_below_numeric_gate".into()
} else if row.max_dsa_score_in_lookback.unwrap_or(0.0) < 2.0 {
"numeric_score_below_tau".into()
} else {
"policy_state_never_reached_review".into()
},
bounded_rescue_would_recover: recovered_after_optimization,
recovered_after_optimization,
optimized_feature_name: optimized_row
.and_then(|row| row.earliest_dsa_feature_name.clone()),
}
})
.collect()
}
fn build_policy_contribution_analysis(
baseline: &DsaEvaluation,
optimized: &DsaEvaluation,
selected_row: &CohortGridResult,
) -> Vec<PolicyContributionAnalysisRow> {
let baseline_missed = baseline
.per_failure_run_signals
.iter()
.filter(|row| row.earliest_dsa_run.is_none())
.map(|row| row.failure_run_index)
.collect::<Vec<_>>();
let optimized_by_failure = optimized
.per_failure_run_signals
.iter()
.map(|row| (row.failure_run_index, row))
.collect::<BTreeMap<_, _>>();
let mut rows = Vec::new();
for contribution in &optimized.motif_policy_contributions {
rows.push(PolicyContributionAnalysisRow {
configuration_role: if selected_row.primary_success {
"best_success".into()
} else {
"best_near_success".into()
},
contribution_type: "motif_nuisance_suppression".into(),
name: contribution.motif_name.clone(),
value: contribution.silent_suppression_points as f64,
note: "silent_suppression_points".into(),
});
rows.push(PolicyContributionAnalysisRow {
configuration_role: if selected_row.primary_success {
"best_success".into()
} else {
"best_near_success".into()
},
contribution_type: "motif_pre_failure_review_or_escalate".into(),
name: contribution.motif_name.clone(),
value: contribution.pre_failure_review_or_escalate_points as f64,
note: "pre_failure_review_or_escalate_points".into(),
});
}
let mut rescued_feature_counts = BTreeMap::<String, usize>::new();
for failure_run_index in baseline_missed {
if let Some(optimized_row) = optimized_by_failure.get(&failure_run_index) {
if let Some(feature_name) = &optimized_row.earliest_dsa_feature_name {
*rescued_feature_counts
.entry(feature_name.clone())
.or_default() += 1;
}
}
}
for (feature_name, count) in rescued_feature_counts {
rows.push(PolicyContributionAnalysisRow {
configuration_role: if selected_row.primary_success {
"best_success".into()
} else {
"best_near_success".into()
},
contribution_type: "rescued_failure_feature".into(),
name: feature_name,
value: count as f64,
note: "recovered baseline-missed failures".into(),
});
}
let mut rescue_transition_counts = BTreeMap::<String, usize>::new();
for trace in &optimized.traces {
for transition in &trace.rescue_transition {
if transition != "none" {
*rescue_transition_counts
.entry(transition.clone())
.or_default() += 1;
}
}
}
for (transition, count) in rescue_transition_counts {
rows.push(PolicyContributionAnalysisRow {
configuration_role: if selected_row.primary_success {
"best_success".into()
} else {
"best_near_success".into()
},
contribution_type: "rescue_transition".into(),
name: transition,
value: count as f64,
note: "rescued feature points".into(),
});
}
rows
}
fn compute_delta_target_assessment(
selected_row: &CohortGridResult,
stage_a_candidates: &[CohortGridResult],
all_rows: &[CohortGridResult],
current_policy_baseline_row: &CohortGridResult,
metrics: &BenchmarkMetrics,
) -> DeltaTargetAssessment {
let ewma_nuisance = metrics.summary.pass_run_ewma_nuisance_rate;
let current_policy_dsa_nuisance = current_policy_baseline_row.pass_run_nuisance_proxy;
let primary_target_nuisance_ceiling = ewma_nuisance * (1.0 - PRIMARY_DELTA_TARGET);
let secondary_target_nuisance_ceiling =
current_policy_dsa_nuisance * (1.0 - SECONDARY_DELTA_TARGET);
let selected_configuration =
delta_candidate_summary(selected_row, ewma_nuisance, current_policy_dsa_nuisance);
let best_recall_103_candidate = all_rows
.iter()
.filter(|row| row.failure_recall >= 103)
.cloned()
.collect::<Vec<_>>()
.into_iter()
.min_by(|left, right| compare_stage_a_rows(left, right, current_policy_dsa_nuisance))
.map(|row| delta_candidate_summary(&row, ewma_nuisance, current_policy_dsa_nuisance));
let best_recall_104_candidate = all_rows
.iter()
.filter(|row| row.failure_recall >= 104)
.cloned()
.collect::<Vec<_>>()
.into_iter()
.min_by(|left, right| compare_stage_a_rows(left, right, current_policy_dsa_nuisance))
.map(|row| delta_candidate_summary(&row, ewma_nuisance, current_policy_dsa_nuisance));
let best_secondary_target_candidate = all_rows
.iter()
.filter(|row| row.failure_recall >= 100)
.cloned()
.max_by(|left, right| {
delta_nuisance_relative(current_policy_dsa_nuisance, left.pass_run_nuisance_proxy)
.partial_cmp(&delta_nuisance_relative(
current_policy_dsa_nuisance,
right.pass_run_nuisance_proxy,
))
.unwrap_or(Ordering::Equal)
})
.map(|row| delta_candidate_summary(&row, ewma_nuisance, current_policy_dsa_nuisance));
let best_stage_a_delta_candidate = stage_a_candidates
.first()
.map(|row| delta_candidate_summary(row, ewma_nuisance, current_policy_dsa_nuisance));
let best_reachable_pareto_point = best_recall_103_candidate
.clone()
.or_else(|| best_stage_a_delta_candidate.clone())
.unwrap_or_else(|| selected_configuration.clone());
let primary_target_met = selected_configuration.delta_nuisance_vs_ewma >= PRIMARY_DELTA_TARGET
&& selected_configuration.failure_recall >= 103;
let ideal_target_met = selected_configuration.delta_nuisance_vs_ewma >= PRIMARY_DELTA_TARGET
&& selected_configuration.failure_recall >= 104;
let secondary_target_met = selected_configuration.delta_nuisance_vs_current_dsa
>= SECONDARY_DELTA_TARGET
&& selected_configuration.failure_recall >= 100;
let mean_lead_time_ge_ewma = paired_ge(
selected_row.mean_lead_time_runs,
metrics.lead_time_summary.mean_ewma_lead_runs,
);
let mean_lead_time_ge_threshold = paired_ge(
selected_row.mean_lead_time_runs,
metrics.lead_time_summary.mean_threshold_lead_runs,
);
let assessment_note = if primary_target_met {
format!(
"Primary 40% nuisance-reduction target reached on {} with delta_nuisance_vs_ewma {:.4} and recall {}/{}.",
selected_configuration.configuration,
selected_configuration.delta_nuisance_vs_ewma,
selected_configuration.failure_recall,
selected_configuration.failure_runs,
)
} else if let Some(best_recall_103_candidate) = &best_recall_103_candidate {
format!(
"Primary 40% nuisance-reduction target was not reachable in the saved deterministic sweep. The best row retaining recall >= 103/104 was {} with nuisance {:.4}, delta_nuisance_vs_ewma {:.4}, and delta_nuisance_vs_current_dsa {:.4}. Reaching the primary target would require nuisance <= {:.4}; no recall >= 103 row achieved that ceiling.",
best_recall_103_candidate.configuration,
best_recall_103_candidate.pass_run_nuisance_proxy,
best_recall_103_candidate.delta_nuisance_vs_ewma,
best_recall_103_candidate.delta_nuisance_vs_current_dsa,
primary_target_nuisance_ceiling,
)
} else if let Some(best_secondary_target_candidate) = &best_secondary_target_candidate {
format!(
"No recall-preserving row reached the primary 40% delta target. The best row with recall >= 100/104 was {} with delta_nuisance_vs_ewma {:.4} and delta_nuisance_vs_current_dsa {:.4}; the secondary 40% target would require nuisance <= {:.4}.",
best_secondary_target_candidate.configuration,
best_secondary_target_candidate.delta_nuisance_vs_ewma,
best_secondary_target_candidate.delta_nuisance_vs_current_dsa,
secondary_target_nuisance_ceiling,
)
} else {
format!(
"No saved row satisfied even the Stage A recall floor, so the 40% target is unachievable under the current deterministic search."
)
};
DeltaTargetAssessment {
primary_target_definition: predeclared_primary_target(),
secondary_target_definition: predeclared_secondary_target(),
ewma_nuisance_baseline: ewma_nuisance,
current_policy_dsa_nuisance_baseline: current_policy_dsa_nuisance,
primary_delta_target: PRIMARY_DELTA_TARGET,
secondary_delta_target: SECONDARY_DELTA_TARGET,
primary_target_nuisance_ceiling,
secondary_target_nuisance_ceiling,
selected_configuration,
primary_target_met,
ideal_target_met,
secondary_target_met,
mean_lead_time_ge_ewma,
mean_lead_time_ge_threshold,
best_recall_103_candidate,
best_recall_104_candidate,
best_secondary_target_candidate,
best_stage_a_delta_candidate,
best_reachable_pareto_point,
assessment_note,
}
}
fn build_operator_baselines(
dataset: &PreparedDataset,
grammar: &GrammarSet,
baseline_evaluation: &DsaEvaluation,
) -> OperatorBaselines {
let numeric_only_dsa = OperatorBaselineLayer {
name: "numeric_only_dsa".into(),
investigation_points: baseline_evaluation.summary.numeric_alert_point_count,
episode_count: episode_ranges(&baseline_evaluation.run_signals.numeric_primary_run_alert)
.len(),
review_escalate_points_per_pass_run: numeric_alert_points_per_pass_run(
dataset,
baseline_evaluation,
),
review_escalate_episodes_per_pass_run: numeric_alert_episodes_per_pass_run(
dataset,
baseline_evaluation,
),
precursor_quality: baseline_evaluation.episode_summary.precursor_quality,
recall: baseline_evaluation
.summary
.numeric_primary_failure_run_recall,
pass_run_nuisance_proxy: baseline_evaluation
.summary
.numeric_primary_pass_run_nuisance_proxy,
};
let current_policy_dsa = OperatorBaselineLayer {
name: "current_policy_dsa".into(),
investigation_points: baseline_evaluation.summary.alert_point_count,
episode_count: baseline_evaluation.episode_summary.dsa_episode_count,
review_escalate_points_per_pass_run: review_escalate_points_per_pass_run(
dataset,
baseline_evaluation,
),
review_escalate_episodes_per_pass_run: review_escalate_episodes_per_pass_run(
dataset,
baseline_evaluation,
),
precursor_quality: baseline_evaluation.episode_summary.precursor_quality,
recall: baseline_evaluation.summary.failure_run_recall,
pass_run_nuisance_proxy: baseline_evaluation.summary.pass_run_nuisance_proxy,
};
let raw_boundary = OperatorBaselineLayer {
name: "raw_boundary".into(),
investigation_points: baseline_evaluation.summary.raw_boundary_episode_count,
episode_count: baseline_evaluation.summary.raw_boundary_episode_count,
review_escalate_points_per_pass_run: raw_boundary_points_per_pass_run(dataset, grammar),
review_escalate_episodes_per_pass_run: raw_boundary_episodes_per_pass_run(dataset, grammar),
precursor_quality: None,
recall: 0,
pass_run_nuisance_proxy: baseline_evaluation.summary.raw_boundary_nuisance_proxy,
};
OperatorBaselines {
investigation_baseline_layer: numeric_only_dsa.name.clone(),
episode_baseline_layer: raw_boundary.name.clone(),
review_burden_baseline_layer: current_policy_dsa.name.clone(),
baseline_investigation_points: numeric_only_dsa.investigation_points,
baseline_episode_count: raw_boundary.episode_count,
baseline_review_escalate_points_per_pass_run: current_policy_dsa
.review_escalate_points_per_pass_run,
baseline_review_escalate_episodes_per_pass_run: current_policy_dsa
.review_escalate_episodes_per_pass_run,
baseline_precursor_quality: current_policy_dsa.precursor_quality,
baseline_recall: current_policy_dsa.recall,
numeric_only_dsa,
current_policy_dsa,
raw_boundary,
}
}
fn compute_operator_delta_targets(
selected_row: &CohortGridResult,
selected_evaluation: &DsaEvaluation,
baselines: &OperatorBaselines,
metrics: &BenchmarkMetrics,
) -> OperatorDeltaTargets {
let baseline_precursor_quality = baselines.baseline_precursor_quality;
let optimized_precursor_quality = selected_row.precursor_quality;
let precursor_quality_status =
match (baseline_precursor_quality, optimized_precursor_quality) {
(Some(baseline), Some(optimized)) if optimized > baseline + 1.0e-9 => "improved",
(Some(baseline), Some(optimized)) if (optimized - baseline).abs() <= 1.0e-9 => {
"preserved"
}
(Some(_), Some(_)) => "degraded",
_ => "unavailable",
}
.to_string();
OperatorDeltaTargets {
primary_success_definition:
"Failure coverage must match threshold or remain within one missed run while investigation-worthy burden is reduced materially relative to a structural baseline."
.into(),
recall_tolerance_runs: RECALL_TOLERANCE,
selected_configuration: delta_candidate_summary(
selected_row,
metrics.summary.pass_run_ewma_nuisance_rate,
baselines.current_policy_dsa.pass_run_nuisance_proxy,
),
baseline_investigation_points: baselines.baseline_investigation_points,
baseline_episode_count: baselines.baseline_episode_count,
baseline_review_points_per_pass_run: baselines
.baseline_review_escalate_points_per_pass_run,
baseline_review_episodes_per_pass_run: baselines
.baseline_review_escalate_episodes_per_pass_run,
optimized_review_escalate_points: selected_row.investigation_point_count,
optimized_episode_count: selected_row.dsa_episode_count,
optimized_review_points_per_pass_run: selected_row.review_escalate_points_per_pass_run,
optimized_review_episodes_per_pass_run: selected_row.review_escalate_episodes_per_pass_run,
delta_investigation_load: delta_relative_count(
baselines.baseline_investigation_points,
selected_row.investigation_point_count,
),
delta_episode_count: delta_relative_count(
baselines.baseline_episode_count,
selected_row.dsa_episode_count,
),
delta_review_points_per_pass_run: delta_relative_f64(
baselines.baseline_review_escalate_points_per_pass_run,
selected_row.review_escalate_points_per_pass_run,
),
delta_review_episodes_per_pass_run: delta_relative_f64(
baselines.baseline_review_escalate_episodes_per_pass_run,
selected_row.review_escalate_episodes_per_pass_run,
),
precursor_quality_status,
recall_equals_threshold: selected_row.failure_recall
== metrics.summary.failure_runs_with_preceding_threshold_signal,
recall_within_tolerance: selected_row.failure_recall + RECALL_TOLERANCE
>= metrics.summary.failure_runs_with_preceding_threshold_signal,
recall_ge_103: selected_row.failure_recall >= 103,
recall_eq_104: selected_row.failure_recall >= 104,
delta_nuisance_vs_ewma: delta_nuisance_relative(
metrics.summary.pass_run_ewma_nuisance_rate,
selected_row.pass_run_nuisance_proxy,
),
delta_nuisance_vs_threshold: delta_nuisance_relative(
metrics.summary.pass_run_threshold_nuisance_rate,
selected_row.pass_run_nuisance_proxy,
),
mean_lead_delta_vs_ewma: selected_row.mean_lead_delta_vs_ewma_runs,
mean_lead_delta_vs_threshold: selected_row.mean_lead_delta_vs_threshold_runs,
median_lead_delta_vs_ewma: selected_row
.median_lead_time_runs
.zip(metrics.lead_time_summary.mean_ewma_lead_runs)
.map(|(selected, ewma)| selected - ewma),
median_lead_delta_vs_threshold: selected_row
.median_lead_time_runs
.zip(metrics.lead_time_summary.mean_threshold_lead_runs)
.map(|(selected, threshold)| selected - threshold),
stable_precursor_lead_time_delta: stable_precursor_lead_time_delta(selected_evaluation),
}
}
fn build_operator_delta_attainment_matrix(
selected_row: &CohortGridResult,
stage1_candidates: &[CohortGridResult],
stage2_candidates: &[CohortGridResult],
baselines: &OperatorBaselines,
metrics: &BenchmarkMetrics,
) -> Vec<OperatorDeltaAttainmentRow> {
let mut rows = Vec::new();
let mut push_row = |configuration_role: &str, row: &CohortGridResult| {
let delta_investigation_load = delta_relative_count(
baselines.baseline_investigation_points,
row.investigation_point_count,
);
let delta_episode_count =
delta_relative_count(baselines.baseline_episode_count, row.dsa_episode_count);
let delta_review_points_per_pass_run = delta_relative_f64(
baselines.baseline_review_escalate_points_per_pass_run,
row.review_escalate_points_per_pass_run,
);
let delta_review_episodes_per_pass_run = delta_relative_f64(
baselines.baseline_review_escalate_episodes_per_pass_run,
row.review_escalate_episodes_per_pass_run,
);
let precursor_quality_status =
match (baselines.baseline_precursor_quality, row.precursor_quality) {
(Some(baseline), Some(optimized)) if optimized > baseline + 1.0e-9 => "improved",
(Some(baseline), Some(optimized)) if (optimized - baseline).abs() <= 1.0e-9 => {
"preserved"
}
(Some(_), Some(_)) => "degraded",
_ => "unavailable",
}
.to_string();
let delta_nuisance_vs_ewma = delta_nuisance_relative(
metrics.summary.pass_run_ewma_nuisance_rate,
row.pass_run_nuisance_proxy,
);
rows.push(OperatorDeltaAttainmentRow {
configuration_role: configuration_role.into(),
configuration: row_label(row),
delta_investigation_load,
delta_episode_count,
delta_review_points_per_pass_run,
delta_review_episodes_per_pass_run,
precursor_quality_status: precursor_quality_status.clone(),
recall: row.failure_recall,
mean_lead_time_runs: row.mean_lead_time_runs,
delta_nuisance_vs_ewma,
target_a_investigation_load_ge_040: delta_investigation_load
>= OPERATOR_DELTA_THRESHOLD,
target_b_episode_count_ge_040: delta_episode_count >= OPERATOR_DELTA_THRESHOLD,
target_c_review_points_per_pass_run_ge_040: delta_review_points_per_pass_run
>= OPERATOR_DELTA_THRESHOLD,
target_d_review_episodes_per_pass_run_ge_040: delta_review_episodes_per_pass_run
>= OPERATOR_DELTA_THRESHOLD,
target_e_precursor_quality_preserved_or_improved: precursor_quality_status
!= "degraded",
target_f_recall_ge_103: row.failure_recall >= 103,
target_g_recall_eq_104: row.failure_recall >= 104,
target_h_nuisance_ge_015: delta_nuisance_vs_ewma >= 0.15,
target_h_nuisance_ge_025: delta_nuisance_vs_ewma >= 0.25,
target_h_nuisance_ge_040: delta_nuisance_vs_ewma >= 0.40,
target_i_stable_precursor_lead_improved: None,
});
};
push_row("selected", selected_row);
if let Some(row) = stage1_candidates.first() {
push_row("stage1_best", row);
}
if let Some(row) = stage2_candidates.first() {
push_row("stage2_best", row);
}
rows
}
fn build_policy_operator_burden_contributions(
dataset: &PreparedDataset,
baseline: &DsaEvaluation,
optimized: &DsaEvaluation,
selected_row: &CohortGridResult,
) -> Vec<OperatorBurdenContributionRow> {
let mut rows = Vec::new();
for (role, evaluation) in [("baseline", baseline), ("optimized", optimized)] {
for contribution in &evaluation.motif_policy_contributions {
rows.push(OperatorBurdenContributionRow {
configuration_role: role.into(),
contribution_scope: "motif".into(),
name: contribution.motif_name.clone(),
contribution_type: "review_escalate_burden".into(),
value: contribution.pass_review_or_escalate_points as f64,
note: "pass-run Review/Escalate feature points".into(),
});
rows.push(OperatorBurdenContributionRow {
configuration_role: role.into(),
contribution_scope: "motif".into(),
name: contribution.motif_name.clone(),
contribution_type: "pre_failure_review_escalate".into(),
value: contribution.pre_failure_review_or_escalate_points as f64,
note: "pre-failure Review/Escalate feature points".into(),
});
rows.push(OperatorBurdenContributionRow {
configuration_role: role.into(),
contribution_scope: "motif".into(),
name: contribution.motif_name.clone(),
contribution_type: "silent_suppression".into(),
value: contribution.silent_suppression_points as f64,
note: "explicit silent suppression points".into(),
});
}
for trace in &evaluation.traces {
let burden = trace
.dsa_alert
.iter()
.enumerate()
.filter(|(run_index, flag)| dataset.labels[*run_index] == -1 && **flag)
.count() as f64;
if burden > 0.0 {
rows.push(OperatorBurdenContributionRow {
configuration_role: role.into(),
contribution_scope: "feature".into(),
name: trace.feature_name.clone(),
contribution_type: "pass_run_review_escalate_burden".into(),
value: burden,
note: format!("selected row {}", row_label(selected_row)),
});
}
}
}
rows
}
fn build_recall_recovery_efficiency(
dataset: &PreparedDataset,
baseline: &DsaEvaluation,
optimized: &DsaEvaluation,
pre_failure_lookback_runs: usize,
) -> Vec<RecallRecoveryEfficiencyRow> {
let recovered_failures =
optimized.summary.failure_run_recall as i64 - baseline.summary.failure_run_recall as i64;
let added_review_escalate_points =
optimized.summary.alert_point_count as i64 - baseline.summary.alert_point_count as i64;
let added_episode_count = optimized.episode_summary.dsa_episode_count as i64
- baseline.episode_summary.dsa_episode_count as i64;
let added_review_points_per_pass_run = review_escalate_points_per_pass_run(dataset, optimized)
- review_escalate_points_per_pass_run(dataset, baseline);
let added_review_episodes_per_pass_run =
review_escalate_episodes_per_pass_run(dataset, optimized)
- review_escalate_episodes_per_pass_run(dataset, baseline);
let baseline_pass_nuisance_runs = (baseline.summary.pass_run_nuisance_proxy
* dataset.labels.iter().filter(|label| **label == -1).count() as f64)
.round() as i64;
let optimized_pass_nuisance_runs = (optimized.summary.pass_run_nuisance_proxy
* dataset.labels.iter().filter(|label| **label == -1).count() as f64)
.round() as i64;
let added_nuisance_runs = optimized_pass_nuisance_runs - baseline_pass_nuisance_runs;
let mut rows = vec![RecallRecoveryEfficiencyRow {
failure_run_index: None,
baseline_configuration: "current_policy_dsa".into(),
optimized_configuration: "optimized_policy_dsa".into(),
recovered_failures,
added_review_escalate_points,
added_episode_count,
added_review_points_per_pass_run,
added_review_episodes_per_pass_run,
added_nuisance_runs,
recovered_failures_per_added_review_escalate_point: ratio_if_positive(
recovered_failures,
added_review_escalate_points,
),
recovered_failures_per_added_episode: ratio_if_positive(
recovered_failures,
added_episode_count,
),
recovered_failures_per_added_pass_run_burden: if added_review_points_per_pass_run > 0.0 {
Some(recovered_failures as f64 / added_review_points_per_pass_run)
} else {
None
},
recovered_failures_per_added_nuisance_run: ratio_if_positive(
recovered_failures,
added_nuisance_runs,
),
}];
let optimized_by_failure = optimized
.per_failure_run_signals
.iter()
.map(|row| (row.failure_run_index, row))
.collect::<BTreeMap<_, _>>();
for failure_signal in baseline
.per_failure_run_signals
.iter()
.filter(|row| row.earliest_dsa_run.is_none())
{
let failure_run_index = failure_signal.failure_run_index;
let start = failure_run_index.saturating_sub(pre_failure_lookback_runs);
let end = failure_run_index;
let baseline_review_points = review_escalate_points_in_window(baseline, start, end) as i64;
let optimized_review_points =
review_escalate_points_in_window(optimized, start, end) as i64;
let baseline_pass_review_points =
review_escalate_points_in_pass_window(dataset, baseline, start, end);
let optimized_pass_review_points =
review_escalate_points_in_pass_window(dataset, optimized, start, end);
let baseline_episode_count =
primary_episode_count_in_window(&baseline.run_signals.primary_run_alert, start, end)
as i64;
let optimized_episode_count =
primary_episode_count_in_window(&optimized.run_signals.primary_run_alert, start, end)
as i64;
let pass_runs_in_window = dataset.labels[start..end]
.iter()
.filter(|label| **label == -1)
.count();
let added_review_points_per_pass_run = if pass_runs_in_window > 0 {
(optimized_pass_review_points as f64 - baseline_pass_review_points as f64)
/ pass_runs_in_window as f64
} else {
0.0
};
let baseline_review_episodes_per_pass_run =
review_escalate_episodes_per_pass_run_in_window(dataset, baseline, start, end);
let optimized_review_episodes_per_pass_run =
review_escalate_episodes_per_pass_run_in_window(dataset, optimized, start, end);
let added_review_episodes_per_pass_run =
optimized_review_episodes_per_pass_run - baseline_review_episodes_per_pass_run;
let baseline_nuisance_runs =
primary_nuisance_runs_in_window(dataset, baseline, start, end) as i64;
let optimized_nuisance_runs =
primary_nuisance_runs_in_window(dataset, optimized, start, end) as i64;
let recovered = optimized_by_failure
.get(&failure_run_index)
.is_some_and(|row| row.earliest_dsa_run.is_some());
rows.push(RecallRecoveryEfficiencyRow {
failure_run_index: Some(failure_run_index),
baseline_configuration: "current_policy_dsa".into(),
optimized_configuration: "optimized_policy_dsa".into(),
recovered_failures: i64::from(recovered),
added_review_escalate_points: optimized_review_points - baseline_review_points,
added_episode_count: optimized_episode_count - baseline_episode_count,
added_review_points_per_pass_run,
added_review_episodes_per_pass_run,
added_nuisance_runs: optimized_nuisance_runs - baseline_nuisance_runs,
recovered_failures_per_added_review_escalate_point: ratio_if_positive(
i64::from(recovered),
optimized_review_points - baseline_review_points,
),
recovered_failures_per_added_episode: ratio_if_positive(
i64::from(recovered),
optimized_episode_count - baseline_episode_count,
),
recovered_failures_per_added_pass_run_burden: if added_review_points_per_pass_run > 0.0
{
Some(1.0 / added_review_points_per_pass_run)
} else {
None
},
recovered_failures_per_added_nuisance_run: ratio_if_positive(
i64::from(recovered),
optimized_nuisance_runs - baseline_nuisance_runs,
),
});
}
rows
}
fn ratio_if_positive(numerator: i64, denominator: i64) -> Option<f64> {
(denominator > 0).then_some(numerator as f64 / denominator as f64)
}
fn stable_precursor_lead_time_delta(selected_evaluation: &DsaEvaluation) -> Option<f64> {
let stable_leads = selected_evaluation
.per_failure_run_signals
.iter()
.filter(|signal| signal.max_dsa_score_motif_recurrence_w.unwrap_or(0.0) >= 0.5)
.filter_map(|signal| signal.dsa_lead_runs.map(|lead| lead as f64))
.collect::<Vec<_>>();
if stable_leads.is_empty() {
None
} else {
Some(stable_leads.iter().sum::<f64>() / stable_leads.len() as f64)
}
}
fn review_escalate_points_in_window(evaluation: &DsaEvaluation, start: usize, end: usize) -> usize {
evaluation
.traces
.iter()
.map(|trace| {
trace.dsa_alert[start..end]
.iter()
.filter(|flag| **flag)
.count()
})
.sum()
}
fn review_escalate_points_in_pass_window(
dataset: &PreparedDataset,
evaluation: &DsaEvaluation,
start: usize,
end: usize,
) -> usize {
evaluation
.traces
.iter()
.map(|trace| {
trace.dsa_alert[start..end]
.iter()
.enumerate()
.filter(|(offset, flag)| dataset.labels[start + *offset] == -1 && **flag)
.count()
})
.sum()
}
fn review_escalate_episodes_per_pass_run_in_window(
dataset: &PreparedDataset,
evaluation: &DsaEvaluation,
start: usize,
end: usize,
) -> f64 {
let pass_runs = dataset.labels[start..end]
.iter()
.filter(|label| **label == -1)
.count();
if pass_runs == 0 {
return 0.0;
}
primary_episode_count_in_window(&evaluation.run_signals.primary_run_alert, start, end) as f64
/ pass_runs as f64
}
fn primary_episode_count_in_window(signal: &[bool], start: usize, end: usize) -> usize {
episode_ranges(signal)
.into_iter()
.filter(|(episode_start, episode_end)| *episode_start < end && *episode_end >= start)
.count()
}
fn primary_nuisance_runs_in_window(
dataset: &PreparedDataset,
evaluation: &DsaEvaluation,
start: usize,
end: usize,
) -> usize {
evaluation.run_signals.primary_run_alert[start..end]
.iter()
.enumerate()
.filter(|(offset, flag)| dataset.labels[start + *offset] == -1 && **flag)
.count()
}
fn episode_ranges(signal: &[bool]) -> Vec<(usize, usize)> {
let mut episodes = Vec::new();
let mut start = None;
for (index, active) in signal.iter().copied().enumerate() {
match (start, active) {
(None, true) => start = Some(index),
(Some(begin), false) => {
episodes.push((begin, index - 1));
start = None;
}
_ => {}
}
}
if let Some(begin) = start {
episodes.push((begin, signal.len().saturating_sub(1)));
}
episodes
}
fn delta_relative_count(baseline: usize, optimized: usize) -> f64 {
if baseline == 0 {
0.0
} else {
(baseline as f64 - optimized as f64) / baseline as f64
}
}
fn delta_relative_f64(baseline: f64, optimized: f64) -> f64 {
if baseline.abs() <= f64::EPSILON {
0.0
} else {
(baseline - optimized) / baseline
}
}
fn review_escalate_points_per_pass_run(
dataset: &PreparedDataset,
evaluation: &DsaEvaluation,
) -> f64 {
let pass_run_count = dataset.labels.iter().filter(|label| **label == -1).count();
if pass_run_count == 0 {
return 0.0;
}
let points = evaluation
.traces
.iter()
.map(|trace| {
trace
.dsa_alert
.iter()
.enumerate()
.filter(|(run_index, flag)| dataset.labels[*run_index] == -1 && **flag)
.count()
})
.sum::<usize>();
points as f64 / pass_run_count as f64
}
fn numeric_alert_points_per_pass_run(dataset: &PreparedDataset, evaluation: &DsaEvaluation) -> f64 {
let pass_run_count = dataset.labels.iter().filter(|label| **label == -1).count();
if pass_run_count == 0 {
return 0.0;
}
let points = evaluation
.traces
.iter()
.map(|trace| {
trace
.numeric_dsa_alert
.iter()
.enumerate()
.filter(|(run_index, flag)| dataset.labels[*run_index] == -1 && **flag)
.count()
})
.sum::<usize>();
points as f64 / pass_run_count as f64
}
fn review_escalate_episodes_per_pass_run(
dataset: &PreparedDataset,
evaluation: &DsaEvaluation,
) -> f64 {
let pass_run_count = dataset.labels.iter().filter(|label| **label == -1).count();
if pass_run_count == 0 {
return 0.0;
}
let mask = evaluation
.run_signals
.primary_run_alert
.iter()
.enumerate()
.map(|(run_index, flag)| dataset.labels[run_index] == -1 && *flag)
.collect::<Vec<_>>();
episode_ranges(&mask).len() as f64 / pass_run_count as f64
}
fn numeric_alert_episodes_per_pass_run(
dataset: &PreparedDataset,
evaluation: &DsaEvaluation,
) -> f64 {
let pass_run_count = dataset.labels.iter().filter(|label| **label == -1).count();
if pass_run_count == 0 {
return 0.0;
}
let mask = evaluation
.run_signals
.numeric_primary_run_alert
.iter()
.enumerate()
.map(|(run_index, flag)| dataset.labels[run_index] == -1 && *flag)
.collect::<Vec<_>>();
episode_ranges(&mask).len() as f64 / pass_run_count as f64
}
fn raw_boundary_points_per_pass_run(dataset: &PreparedDataset, grammar: &GrammarSet) -> f64 {
let pass_run_count = dataset.labels.iter().filter(|label| **label == -1).count();
if pass_run_count == 0 {
return 0.0;
}
let points = grammar
.traces
.iter()
.map(|trace| {
trace
.raw_states
.iter()
.enumerate()
.filter(|(run_index, state)| {
dataset.labels[*run_index] == -1
&& **state == crate::grammar::GrammarState::Boundary
})
.count()
})
.sum::<usize>();
points as f64 / pass_run_count as f64
}
fn raw_boundary_episodes_per_pass_run(dataset: &PreparedDataset, grammar: &GrammarSet) -> f64 {
let pass_run_count = dataset.labels.iter().filter(|label| **label == -1).count();
if pass_run_count == 0 {
return 0.0;
}
let episode_count = grammar
.traces
.iter()
.map(|trace| {
let mask = trace
.raw_states
.iter()
.enumerate()
.map(|(run_index, state)| {
dataset.labels[run_index] == -1
&& *state == crate::grammar::GrammarState::Boundary
})
.collect::<Vec<_>>();
episode_ranges(&mask).len()
})
.sum::<usize>();
episode_count as f64 / pass_run_count as f64
}
fn operator_burden_contribution_by_feature(
dataset: &PreparedDataset,
evaluation: &DsaEvaluation,
) -> BTreeMap<usize, f64> {
evaluation
.traces
.iter()
.map(|trace| {
let burden = trace
.dsa_alert
.iter()
.enumerate()
.filter(|(run_index, flag)| dataset.labels[*run_index] == -1 && **flag)
.count() as f64;
(trace.feature_index, burden)
})
.collect()
}
fn delta_candidate_summary(
row: &CohortGridResult,
ewma_nuisance: f64,
current_policy_dsa_nuisance: f64,
) -> DeltaCandidateSummary {
DeltaCandidateSummary {
configuration: row_label(row),
ranking_strategy: row.ranking_strategy.clone(),
cohort_name: row.cohort_name.clone(),
window: row.window,
persistence_runs: row.persistence_runs,
alert_tau: row.alert_tau,
corroborating_m: row.corroborating_m,
failure_recall: row.failure_recall,
failure_runs: row.failure_runs,
pass_run_nuisance_proxy: row.pass_run_nuisance_proxy,
delta_nuisance_vs_ewma: delta_nuisance_relative(ewma_nuisance, row.pass_run_nuisance_proxy),
delta_nuisance_vs_current_dsa: delta_nuisance_relative(
current_policy_dsa_nuisance,
row.pass_run_nuisance_proxy,
),
mean_lead_time_runs: row.mean_lead_time_runs,
precursor_quality: row.precursor_quality,
compression_ratio: row.compression_ratio,
}
}
fn delta_nuisance_relative(baseline_nuisance: f64, dsa_nuisance: f64) -> f64 {
if baseline_nuisance.abs() <= f64::EPSILON {
0.0
} else {
(baseline_nuisance - dsa_nuisance) / baseline_nuisance
}
}
pub fn write_cohort_results_csv(path: &Path, results: &[CohortGridResult]) -> Result<()> {
let mut writer = Writer::from_path(path)?;
writer.write_record([
"ranking_strategy",
"ranking_formula",
"grid_row_id",
"feature_trace_config_id",
"cohort_name",
"cohort_size",
"window",
"persistence_runs",
"alert_tau",
"corroborating_m",
"primary_run_signal",
"failure_recall",
"failure_runs",
"failure_recall_rate",
"threshold_recall",
"ewma_recall",
"failure_recall_delta_vs_threshold",
"failure_recall_delta_vs_ewma",
"mean_lead_time_runs",
"median_lead_time_runs",
"threshold_mean_lead_time_runs",
"ewma_mean_lead_time_runs",
"mean_lead_delta_vs_threshold_runs",
"mean_lead_delta_vs_ewma_runs",
"pass_run_nuisance_proxy",
"numeric_pass_run_nuisance_proxy",
"ewma_nuisance",
"threshold_nuisance",
"pass_run_nuisance_delta_vs_ewma",
"pass_run_nuisance_delta_vs_threshold",
"pass_run_nuisance_delta_vs_numeric_dsa",
"raw_boundary_episode_count",
"dsa_episode_count",
"dsa_episodes_preceding_failure",
"mean_dsa_episode_length_runs",
"max_dsa_episode_length_runs",
"compression_ratio",
"precursor_quality",
"non_escalating_dsa_episode_fraction",
"feature_level_active_points",
"feature_level_alert_points",
"persistence_suppression_fraction",
"numeric_failure_recall",
"policy_vs_numeric_recall_delta",
"watch_point_count",
"review_point_count",
"escalate_point_count",
"silenced_point_count",
"rescued_point_count",
"rescued_watch_to_review_points",
"rescued_review_to_escalate_points",
"primary_success",
"primary_success_reason",
])?;
for row in results {
writer.write_record([
row.ranking_strategy.clone(),
row.ranking_formula.clone(),
row.grid_row_id.to_string(),
row.feature_trace_config_id.to_string(),
row.cohort_name.clone(),
row.cohort_size.to_string(),
row.window.to_string(),
row.persistence_runs.to_string(),
format!("{:.6}", row.alert_tau),
row.corroborating_m.to_string(),
row.primary_run_signal.clone(),
row.failure_recall.to_string(),
row.failure_runs.to_string(),
format!("{:.6}", row.failure_recall_rate),
row.threshold_recall.to_string(),
row.ewma_recall.to_string(),
row.failure_recall_delta_vs_threshold.to_string(),
row.failure_recall_delta_vs_ewma.to_string(),
format_option_csv(row.mean_lead_time_runs),
format_option_csv(row.median_lead_time_runs),
format_option_csv(row.threshold_mean_lead_time_runs),
format_option_csv(row.ewma_mean_lead_time_runs),
format_option_csv(row.mean_lead_delta_vs_threshold_runs),
format_option_csv(row.mean_lead_delta_vs_ewma_runs),
format!("{:.6}", row.pass_run_nuisance_proxy),
format!("{:.6}", row.numeric_pass_run_nuisance_proxy),
format!("{:.6}", row.ewma_nuisance),
format!("{:.6}", row.threshold_nuisance),
format!("{:.6}", row.pass_run_nuisance_delta_vs_ewma),
format!("{:.6}", row.pass_run_nuisance_delta_vs_threshold),
format!("{:.6}", row.pass_run_nuisance_delta_vs_numeric_dsa),
row.raw_boundary_episode_count.to_string(),
row.dsa_episode_count.to_string(),
row.dsa_episodes_preceding_failure.to_string(),
format_option_csv(row.mean_dsa_episode_length_runs),
row.max_dsa_episode_length_runs.to_string(),
format_option_csv(row.compression_ratio),
format_option_csv(row.precursor_quality),
format_option_csv(row.non_escalating_dsa_episode_fraction),
row.feature_level_active_points.to_string(),
row.feature_level_alert_points.to_string(),
format_option_csv(row.persistence_suppression_fraction),
row.numeric_failure_recall.to_string(),
row.policy_vs_numeric_recall_delta.to_string(),
row.watch_point_count.to_string(),
row.review_point_count.to_string(),
row.escalate_point_count.to_string(),
row.silenced_point_count.to_string(),
row.rescued_point_count.to_string(),
row.rescued_watch_to_review_points.to_string(),
row.rescued_review_to_escalate_points.to_string(),
row.primary_success.to_string(),
row.primary_success_reason.clone(),
])?;
}
writer.flush()?;
Ok(())
}
pub fn write_motif_policy_contributions_csv(
path: &Path,
rows: &[CohortMotifPolicyContributionRow],
) -> Result<()> {
let mut writer = Writer::from_path(path)?;
writer.write_record([
"grid_row_id",
"cohort_name",
"cohort_size",
"window",
"persistence_runs",
"alert_tau",
"corroborating_m",
"motif_name",
"alert_class_default",
"watch_points",
"review_points",
"escalate_points",
"silent_suppression_points",
"pass_review_or_escalate_points",
"pre_failure_review_or_escalate_points",
])?;
for row in rows {
writer.write_record([
row.grid_row_id.to_string(),
row.cohort_name.clone(),
row.cohort_size.to_string(),
row.window.to_string(),
row.persistence_runs.to_string(),
format!("{:.6}", row.alert_tau),
row.corroborating_m.to_string(),
row.motif_name.clone(),
format!("{:?}", row.alert_class_default),
row.watch_points.to_string(),
row.review_points.to_string(),
row.escalate_points.to_string(),
row.silent_suppression_points.to_string(),
row.pass_review_or_escalate_points.to_string(),
row.pre_failure_review_or_escalate_points.to_string(),
])?;
}
writer.flush()?;
Ok(())
}
pub fn write_precursor_quality_csv(path: &Path, results: &[CohortGridResult]) -> Result<()> {
let mut writer = Writer::from_path(path)?;
writer.write_record([
"cohort_name",
"window",
"persistence_runs",
"alert_tau",
"corroborating_m",
"raw_boundary_episode_count",
"dsa_episode_count",
"dsa_episodes_preceding_failure",
"precursor_quality",
"compression_ratio",
])?;
for row in results {
writer.write_record([
row.cohort_name.clone(),
row.window.to_string(),
row.persistence_runs.to_string(),
format!("{:.6}", row.alert_tau),
row.corroborating_m.to_string(),
row.raw_boundary_episode_count.to_string(),
row.dsa_episode_count.to_string(),
row.dsa_episodes_preceding_failure.to_string(),
format_option_csv(row.precursor_quality),
format_option_csv(row.compression_ratio),
])?;
}
writer.flush()?;
Ok(())
}
pub fn write_failure_analysis_md(path: &Path, analysis: &CohortFailureAnalysis) -> Result<()> {
let content = format!(
"# DSA Cohort Failure Analysis\n\n\
## Closest near-success configuration\n\n\
- Cohort: {}\n\
- Grid point: {}\n\
- Policy setting: {}\n\
- Nuisance: {:.6}\n\
- Recall: {}\n\
- EWMA nuisance target: {:.6}\n\
- Threshold recall target: {}\n\n\
## Limiting factor\n\n\
{}\n\n\
## Cross-feature corroboration effect\n\n\
{}\n\n\
## Policy vs numeric-only DSA\n\n\
{}\n\n\
## Ranking quality\n\n\
{}\n\n\
## All-feature DSA vs cohort DSA\n\n\
{}\n\n\
## Motif classes most responsible for nuisance\n\n\
{}\n\n\
## Motif classes most responsible for useful precursor episodes\n\n\
{}\n\n\
## Best near-success source\n\n\
{}\n",
analysis.closest_cohort,
analysis.closest_grid_point,
analysis.closest_policy_setting,
analysis.closest_nuisance,
analysis.closest_recall,
analysis.ewma_nuisance,
analysis.threshold_recall,
analysis.limiting_factor,
analysis.corroboration_effect,
analysis.policy_vs_numeric_note,
analysis.ranking_quality_note,
analysis.all_feature_dsa_vs_cohort_note,
analysis.nuisance_motif_classes,
analysis.useful_precursor_motif_classes,
analysis.best_near_success_source,
);
std::fs::write(path, content)?;
Ok(())
}
pub fn write_heuristic_policy_failure_analysis_md(
path: &Path,
analysis: &CohortFailureAnalysis,
) -> Result<()> {
write_failure_analysis_md(path, analysis)
}
pub fn compute_rating_delta_forecast(
dsa: &DsaEvaluation,
metrics: &BenchmarkMetrics,
cohort_summary: Option<&CohortDsaSummary>,
) -> RatingDeltaForecast {
let chosen = cohort_summary
.and_then(|summary| summary.selected_configuration.as_ref())
.cloned()
.unwrap_or_else(|| fallback_row_from_dsa(dsa, metrics));
let ewma_nuisance = metrics.summary.pass_run_ewma_nuisance_rate;
let threshold_recall = metrics.summary.failure_runs_with_preceding_threshold_signal;
let best_all_features = cohort_summary
.and_then(best_all_features_row)
.cloned()
.unwrap_or_else(|| fallback_row_from_dsa(dsa, metrics));
let primary_success_met = chosen.pass_run_nuisance_proxy < ewma_nuisance
&& chosen.failure_recall + RECALL_TOLERANCE >= threshold_recall;
let secondary_lead_time_vs_ewma = paired_ge(
chosen.mean_lead_time_runs,
metrics.lead_time_summary.mean_ewma_lead_runs,
);
let secondary_lead_time_vs_threshold = paired_ge(
chosen.mean_lead_time_runs,
metrics.lead_time_summary.mean_threshold_lead_runs,
);
let secondary_precursor_quality_vs_all_feature_dsa = compare_option_gt(
chosen.precursor_quality,
best_all_features.precursor_quality,
);
let secondary_compression_material = chosen.compression_ratio.map(|ratio| ratio > 1.0);
let secondary_targets_met = secondary_lead_time_vs_ewma && secondary_lead_time_vs_threshold;
let (achieved_forecast_score, forecast_justification) = if primary_success_met
&& secondary_targets_met
{
(
FORECAST_PRIMARY_PLUS_SECONDARY,
format!(
"Primary success met for {}: nuisance {:.4} < EWMA {:.4}, recall {} >= threshold {} - {}. Mean lead {} is at least EWMA {} and threshold {}.",
row_label(&chosen),
chosen.pass_run_nuisance_proxy,
ewma_nuisance,
chosen.failure_recall,
threshold_recall,
RECALL_TOLERANCE,
format_option_f64(chosen.mean_lead_time_runs),
format_option_f64(metrics.lead_time_summary.mean_ewma_lead_runs),
format_option_f64(metrics.lead_time_summary.mean_threshold_lead_runs),
),
)
} else if primary_success_met {
(
FORECAST_PRIMARY_ONLY,
format!(
"Primary success met for {}: nuisance {:.4} < EWMA {:.4}, recall {} >= threshold {} - {}. Mean lead {} does not meet both secondary lead-time targets.",
row_label(&chosen),
chosen.pass_run_nuisance_proxy,
ewma_nuisance,
chosen.failure_recall,
threshold_recall,
RECALL_TOLERANCE,
format_option_f64(chosen.mean_lead_time_runs),
),
)
} else if chosen.pass_run_nuisance_proxy < ewma_nuisance {
(
FORECAST_RECALL_SHORTFALL_VALUE,
format!(
"Nuisance improved for {} ({:.4} < EWMA {:.4}) but recall {} is below threshold {} - {}.",
row_label(&chosen),
chosen.pass_run_nuisance_proxy,
ewma_nuisance,
chosen.failure_recall,
threshold_recall,
RECALL_TOLERANCE,
),
)
} else {
(
CURRENT_BASELINE_SCORE,
format!(
"Primary success condition not met for {}. Nuisance {:.4} vs EWMA {:.4}; recall {} vs threshold {} - {}.",
row_label(&chosen),
chosen.pass_run_nuisance_proxy,
ewma_nuisance,
chosen.failure_recall,
threshold_recall,
RECALL_TOLERANCE,
),
)
};
RatingDeltaForecast {
current_baseline_score: CURRENT_BASELINE_SCORE,
primary_success_condition: rating_primary_success_condition(),
recall_tolerance_runs: RECALL_TOLERANCE,
chosen_configuration: row_label(&chosen),
primary_success_met,
secondary_targets_met,
secondary_lead_time_vs_ewma,
secondary_lead_time_vs_threshold,
secondary_precursor_quality_vs_all_feature_dsa,
secondary_compression_material,
forecast_score_if_primary_success_only: FORECAST_PRIMARY_ONLY,
forecast_score_if_primary_plus_secondary_success: FORECAST_PRIMARY_PLUS_SECONDARY,
achieved_forecast_score,
forecast_justification,
category_forecasts: build_category_forecasts(primary_success_met, secondary_targets_met),
supporting_metrics: ForecastSupportingMetrics {
chosen_configuration: row_label(&chosen),
dsa_nuisance: chosen.pass_run_nuisance_proxy,
ewma_nuisance,
dsa_recall: chosen.failure_recall,
threshold_recall,
recall_tolerance_runs: RECALL_TOLERANCE,
dsa_mean_lead_time_runs: chosen.mean_lead_time_runs,
ewma_mean_lead_time_runs: metrics.lead_time_summary.mean_ewma_lead_runs,
threshold_mean_lead_time_runs: metrics.lead_time_summary.mean_threshold_lead_runs,
dsa_precursor_quality: chosen.precursor_quality,
all_feature_dsa_precursor_quality: best_all_features.precursor_quality,
dsa_compression_ratio: chosen.compression_ratio,
all_feature_dsa_compression_ratio: best_all_features.compression_ratio,
},
}
}
pub fn compute_rating_failure_analysis(
dsa: &DsaEvaluation,
metrics: &BenchmarkMetrics,
cohort_summary: Option<&CohortDsaSummary>,
) -> Option<RatingDeltaFailureAnalysis> {
let chosen = cohort_summary
.and_then(|summary| summary.selected_configuration.as_ref())
.cloned()
.unwrap_or_else(|| fallback_row_from_dsa(dsa, metrics));
let ewma_nuisance = metrics.summary.pass_run_ewma_nuisance_rate;
let threshold_recall = metrics.summary.failure_runs_with_preceding_threshold_signal;
let primary_success_met = chosen.pass_run_nuisance_proxy < ewma_nuisance
&& chosen.failure_recall + RECALL_TOLERANCE >= threshold_recall;
if primary_success_met {
return None;
}
let nuisance_gap = (chosen.pass_run_nuisance_proxy - ewma_nuisance).max(0.0);
let recall_gap_runs = if chosen.failure_recall + RECALL_TOLERANCE >= threshold_recall {
0
} else {
(threshold_recall - RECALL_TOLERANCE - chosen.failure_recall) as i64
};
Some(RatingDeltaFailureAnalysis {
closest_configuration: row_label(&chosen),
dsa_nuisance: chosen.pass_run_nuisance_proxy,
ewma_nuisance,
dsa_recall: chosen.failure_recall,
threshold_recall,
recall_tolerance_runs: RECALL_TOLERANCE,
nuisance_gap,
recall_gap_runs,
nuisance_missed_by: if nuisance_gap == 0.0 {
"no miss; nuisance target was met".into()
} else if nuisance_gap <= 0.01 {
"small margin".into()
} else {
"large margin".into()
},
recall_preserved: recall_gap_runs == 0,
limiting_factor: determine_rating_limiting_factor(
cohort_summary,
&chosen,
ewma_nuisance,
threshold_recall,
),
})
}
pub fn write_rating_failure_analysis_md(
path: &Path,
analysis: &RatingDeltaFailureAnalysis,
) -> Result<()> {
let content = format!(
"# DSA Rating Delta Failure Analysis\n\n\
## Closest near-success configuration\n\n\
- Configuration: {}\n\
- DSA nuisance: {:.6}\n\
- EWMA nuisance: {:.6}\n\
- DSA recall: {}\n\
- Threshold recall: {}\n\
- Recall tolerance: {} run(s)\n\n\
## Nuisance\n\n\
- Gap from EWMA: {:.6}\n\
- Missed by: {}\n\n\
## Recall\n\n\
- Recall gap from threshold - tolerance: {}\n\
- Recall preserved: {}\n\n\
## Limiting factor\n\n\
{}\n",
analysis.closest_configuration,
analysis.dsa_nuisance,
analysis.ewma_nuisance,
analysis.dsa_recall,
analysis.threshold_recall,
analysis.recall_tolerance_runs,
analysis.nuisance_gap,
analysis.nuisance_missed_by,
analysis.recall_gap_runs,
analysis.recall_preserved,
analysis.limiting_factor,
);
std::fs::write(path, content)?;
Ok(())
}
pub fn cohort_report_section(cohorts: &FeatureCohorts, summary: &CohortDsaSummary) -> String {
let mut out = String::new();
out.push_str("## Feature-Cohort DSA Selection\n\n");
out.push_str(&format!(
"- Ranking formula: `{}`\n- Missingness penalty: {:.1} when `missing_fraction > {:.2}`\n- Selected cohorts: top_4={}, top_8={}, top_16={}, all_features={}\n- Legacy one-run-tolerance cohort gate used inside the bounded sweep: {}\n- Full bounded cohort grid: `W in {{5,10,15}}`, `K in {{2,3,4}}`, `tau in {{2.0,2.5,3.0}}`, `m in {{1,2,3,5}}` where valid\n\n",
summary.ranking_formula,
cohorts.missingness_penalty_value,
cohorts.missingness_penalty_threshold,
cohorts.top_4.len(),
cohorts.top_8.len(),
cohorts.top_16.len(),
cohorts.all_features.len(),
summary.primary_success_condition,
));
out.push_str("### Seed-feature check\n\n");
for seed in &cohorts.seed_feature_report {
if seed.found_in_ranking {
out.push_str(&format!(
"- {}: rank {}, score {:.4}, top_4={}, top_8={}, top_16={}\n",
seed.feature_name,
seed.rank.unwrap_or(0),
seed.candidate_score.unwrap_or(0.0),
seed.in_top_4,
seed.in_top_8,
seed.in_top_16,
));
} else {
out.push_str(&format!(
"- {}: not present in the analyzable-feature ranking\n",
seed.feature_name,
));
}
}
out.push('\n');
out.push_str("### Best row per cohort\n\n");
out.push_str("| Cohort | W | K | tau | m | Recall | Mean lead | Nuisance | Episodes | Compression | Precursor quality | Legacy gate |\n");
out.push_str("|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---|\n");
for best in &summary.best_by_cohort {
let row = &best.best_row;
out.push_str(&format!(
"| {} | {} | {} | {:.1} | {} | {}/{} | {} | {:.4} | {} | {} | {} | {} |\n",
row.cohort_name,
row.window,
row.persistence_runs,
row.alert_tau,
row.corroborating_m,
row.failure_recall,
row.failure_runs,
format_option_f64(row.mean_lead_time_runs),
row.pass_run_nuisance_proxy,
row.dsa_episode_count,
format_option_f64(row.compression_ratio),
format_option_f64(row.precursor_quality),
if row.primary_success { "yes" } else { "no" },
));
}
out.push('\n');
if let Some(selected) = &summary.selected_configuration {
out.push_str("### Best cohort/grid result\n\n");
out.push_str(&format!(
"- Selected configuration: {}\n- Recall: {}/{}\n- Mean lead: {}\n- Median lead: {}\n- Nuisance: {:.4} versus EWMA {:.4}\n- Compression ratio: {}\n- Precursor quality: {}\n- Legacy one-run-tolerance cohort gate met: {}\n\n",
row_label(selected),
selected.failure_recall,
selected.failure_runs,
format_option_f64(selected.mean_lead_time_runs),
format_option_f64(selected.median_lead_time_runs),
selected.pass_run_nuisance_proxy,
selected.ewma_nuisance,
format_option_f64(selected.compression_ratio),
format_option_f64(selected.precursor_quality),
selected.primary_success,
));
}
if let Some(failure_analysis) = &summary.failure_analysis {
out.push_str("### Failure analysis\n\n");
out.push_str(&format!(
"- Closest cohort: {}\n- Closest grid point: {}\n- Limiting factor: {}\n- Corroboration effect: {}\n- Ranking quality: {}\n- All-feature vs cohort: {}\n- Best near-success source: {}\n\n",
failure_analysis.closest_cohort,
failure_analysis.closest_grid_point,
failure_analysis.limiting_factor,
failure_analysis.corroboration_effect,
failure_analysis.ranking_quality_note,
failure_analysis.all_feature_dsa_vs_cohort_note,
failure_analysis.best_near_success_source,
));
}
out.push_str("- Saved artifacts: `dsa_feature_ranking.csv`, `dsa_seed_feature_check.json`, `dsa_feature_cohorts.json`, `dsa_grid_results.csv`, `dsa_cohort_results.csv`, `dsa_cohort_summary.json`, `dsa_cohort_precursor_quality.csv`\n");
if summary.failure_analysis.is_some() {
out.push_str("- Failure-analysis artifact: `dsa_cohort_failure_analysis.md`\n");
}
out.push('\n');
out
}
pub fn rating_forecast_report_section(forecast: &RatingDeltaForecast) -> String {
let mut out = String::new();
out.push_str("## Rating Delta Forecast\n\n");
out.push_str(&format!(
"- Primary success condition: {}\n- Primary success met: {}\n- Chosen configuration: {}\n- Forecast score if primary success only: {:.1}\n- Forecast score if primary + secondary success: {:.1}\n- Forecast score under current measured result: {:.1}\n\n",
forecast.primary_success_condition,
forecast.primary_success_met,
forecast.chosen_configuration,
forecast.forecast_score_if_primary_success_only,
forecast.forecast_score_if_primary_plus_secondary_success,
forecast.achieved_forecast_score,
));
out.push_str("*Forecast only. This is not an achieved score.*\n\n");
out.push_str(&format!("{}\n\n", forecast.forecast_justification));
out.push_str(&format!(
"- DSA nuisance: {:.6}\n- EWMA nuisance: {:.6}\n- DSA recall: {}\n- Threshold recall: {}\n- Recall tolerance: {} run(s)\n- DSA mean lead: {}\n- EWMA mean lead: {}\n- Threshold mean lead: {}\n- DSA precursor quality: {}\n- All-feature DSA precursor quality: {}\n- DSA compression ratio: {}\n- All-feature DSA compression ratio: {}\n\n",
forecast.supporting_metrics.dsa_nuisance,
forecast.supporting_metrics.ewma_nuisance,
forecast.supporting_metrics.dsa_recall,
forecast.supporting_metrics.threshold_recall,
forecast.supporting_metrics.recall_tolerance_runs,
format_option_f64(forecast.supporting_metrics.dsa_mean_lead_time_runs),
format_option_f64(forecast.supporting_metrics.ewma_mean_lead_time_runs),
format_option_f64(forecast.supporting_metrics.threshold_mean_lead_time_runs),
format_option_f64(forecast.supporting_metrics.dsa_precursor_quality),
format_option_f64(forecast.supporting_metrics.all_feature_dsa_precursor_quality),
format_option_f64(forecast.supporting_metrics.dsa_compression_ratio),
format_option_f64(forecast.supporting_metrics.all_feature_dsa_compression_ratio),
));
out
}
fn build_grid_row(
grid_row_id: usize,
feature_trace_config_id: usize,
ranking_strategy: &str,
ranking_formula: &str,
cohort_name: &str,
cohort_size: usize,
config: &DsaConfig,
corroborating_m: usize,
dataset: &PreparedDataset,
evaluation: &DsaEvaluation,
metrics: &BenchmarkMetrics,
) -> CohortGridResult {
let feature_level_active_points = evaluation
.traces
.iter()
.map(|trace| trace.dsa_active.iter().filter(|flag| **flag).count())
.sum::<usize>();
let feature_level_alert_points = evaluation
.traces
.iter()
.map(|trace| trace.dsa_alert.iter().filter(|flag| **flag).count())
.sum::<usize>();
let threshold_recall = metrics.summary.failure_runs_with_preceding_threshold_signal;
let ewma_recall = metrics.summary.failure_runs_with_preceding_ewma_signal;
let ewma_nuisance = metrics.summary.pass_run_ewma_nuisance_rate;
let threshold_nuisance = metrics.summary.pass_run_threshold_nuisance_rate;
let pass_run_count = dataset.labels.iter().filter(|label| **label == -1).count();
let review_escalate_points_per_pass_run =
review_escalate_points_per_pass_run(dataset, evaluation);
let numeric_alert_points_per_pass_run = numeric_alert_points_per_pass_run(dataset, evaluation);
let review_escalate_episodes_per_pass_run =
review_escalate_episodes_per_pass_run(dataset, evaluation);
let numeric_alert_episodes_per_pass_run =
numeric_alert_episodes_per_pass_run(dataset, evaluation);
let primary_success = evaluation.summary.pass_run_nuisance_proxy < ewma_nuisance
&& evaluation.summary.failure_run_recall + RECALL_TOLERANCE >= threshold_recall;
CohortGridResult {
ranking_strategy: ranking_strategy.to_string(),
ranking_formula: ranking_formula.to_string(),
grid_row_id,
feature_trace_config_id,
cohort_name: cohort_name.to_string(),
cohort_size,
window: config.window,
persistence_runs: config.persistence_runs,
alert_tau: config.alert_tau,
corroborating_m,
primary_run_signal: evaluation.run_signals.primary_run_signal.clone(),
failure_recall: evaluation.summary.failure_run_recall,
failure_runs: evaluation.summary.failure_runs,
failure_recall_rate: evaluation.summary.failure_run_recall_rate,
threshold_recall,
ewma_recall,
failure_recall_delta_vs_threshold: evaluation
.comparison_summary
.failure_recall_delta_vs_threshold,
failure_recall_delta_vs_ewma: evaluation.comparison_summary.failure_recall_delta_vs_ewma,
mean_lead_time_runs: evaluation.summary.mean_lead_time_runs,
median_lead_time_runs: evaluation.summary.median_lead_time_runs,
threshold_mean_lead_time_runs: metrics.lead_time_summary.mean_threshold_lead_runs,
ewma_mean_lead_time_runs: metrics.lead_time_summary.mean_ewma_lead_runs,
mean_lead_delta_vs_threshold_runs: evaluation.summary.mean_lead_delta_vs_threshold_runs,
mean_lead_delta_vs_ewma_runs: evaluation.summary.mean_lead_delta_vs_ewma_runs,
pass_run_nuisance_proxy: evaluation.summary.pass_run_nuisance_proxy,
numeric_pass_run_nuisance_proxy: evaluation.summary.numeric_primary_pass_run_nuisance_proxy,
ewma_nuisance,
threshold_nuisance,
pass_run_nuisance_delta_vs_ewma: evaluation.summary.pass_run_nuisance_proxy - ewma_nuisance,
pass_run_nuisance_delta_vs_threshold: evaluation.summary.pass_run_nuisance_proxy
- threshold_nuisance,
pass_run_nuisance_delta_vs_numeric_dsa: evaluation
.comparison_summary
.pass_run_nuisance_delta_vs_numeric_dsa,
raw_boundary_episode_count: evaluation.episode_summary.raw_boundary_episode_count,
dsa_episode_count: evaluation.episode_summary.dsa_episode_count,
dsa_episodes_preceding_failure: evaluation.episode_summary.dsa_episodes_preceding_failure,
mean_dsa_episode_length_runs: evaluation.episode_summary.mean_dsa_episode_length_runs,
max_dsa_episode_length_runs: evaluation.episode_summary.max_dsa_episode_length_runs,
compression_ratio: evaluation.episode_summary.compression_ratio,
precursor_quality: evaluation.episode_summary.precursor_quality,
non_escalating_dsa_episode_fraction: evaluation
.episode_summary
.non_escalating_dsa_episode_fraction,
feature_level_active_points,
feature_level_alert_points,
persistence_suppression_fraction: if feature_level_active_points == 0 {
None
} else {
Some(1.0 - feature_level_alert_points as f64 / feature_level_active_points as f64)
},
numeric_failure_recall: evaluation.summary.numeric_primary_failure_run_recall,
policy_vs_numeric_recall_delta: evaluation
.comparison_summary
.policy_vs_numeric_recall_delta,
watch_point_count: evaluation.summary.watch_point_count,
review_point_count: evaluation.summary.review_point_count,
escalate_point_count: evaluation.summary.escalate_point_count,
investigation_point_count: evaluation.summary.alert_point_count,
numeric_investigation_point_count: evaluation.summary.numeric_alert_point_count,
silenced_point_count: evaluation.summary.silenced_point_count,
rescued_point_count: evaluation.summary.rescued_point_count,
rescued_watch_to_review_points: evaluation.summary.rescued_watch_to_review_points,
rescued_review_to_escalate_points: evaluation.summary.rescued_review_to_escalate_points,
review_escalate_points_per_pass_run: if pass_run_count == 0 {
0.0
} else {
review_escalate_points_per_pass_run
},
numeric_alert_points_per_pass_run: if pass_run_count == 0 {
0.0
} else {
numeric_alert_points_per_pass_run
},
review_escalate_episodes_per_pass_run: if pass_run_count == 0 {
0.0
} else {
review_escalate_episodes_per_pass_run
},
numeric_alert_episodes_per_pass_run: if pass_run_count == 0 {
0.0
} else {
numeric_alert_episodes_per_pass_run
},
primary_success,
primary_success_reason: primary_success_reason(
evaluation.summary.failure_run_recall,
threshold_recall,
evaluation.summary.pass_run_nuisance_proxy,
ewma_nuisance,
),
}
}
fn build_motif_policy_rows(
row: &CohortGridResult,
evaluation: &DsaEvaluation,
) -> Vec<CohortMotifPolicyContributionRow> {
evaluation
.motif_policy_contributions
.iter()
.map(|contribution| CohortMotifPolicyContributionRow {
grid_row_id: row.grid_row_id,
cohort_name: row.cohort_name.clone(),
cohort_size: row.cohort_size,
window: row.window,
persistence_runs: row.persistence_runs,
alert_tau: row.alert_tau,
corroborating_m: row.corroborating_m,
motif_name: contribution.motif_name.clone(),
alert_class_default: contribution.alert_class_default,
watch_points: contribution.watch_points,
review_points: contribution.review_points,
escalate_points: contribution.escalate_points,
silent_suppression_points: contribution.silent_suppression_points,
pass_review_or_escalate_points: contribution.pass_review_or_escalate_points,
pre_failure_review_or_escalate_points: contribution
.pre_failure_review_or_escalate_points,
})
.collect()
}
fn build_best_by_cohort(rows: &[CohortGridResult]) -> Vec<CohortBestRow> {
let mut grouped = BTreeMap::<String, Vec<CohortGridResult>>::new();
for row in rows {
grouped
.entry(format!("{} [{}]", row.cohort_name, row.ranking_strategy))
.or_default()
.push(row.clone());
}
grouped
.into_iter()
.filter_map(|(cohort_name, cohort_rows)| {
best_row(&cohort_rows).map(|best_row| CohortBestRow {
cohort_name,
best_row,
})
})
.collect()
}
fn best_row(rows: &[CohortGridResult]) -> Option<CohortGridResult> {
let success_rows = rows
.iter()
.filter(|row| row.primary_success)
.cloned()
.collect::<Vec<_>>();
if !success_rows.is_empty() {
return success_rows.into_iter().min_by(compare_successful_rows);
}
choose_closest_to_success(rows)
}
fn choose_closest_to_success(rows: &[CohortGridResult]) -> Option<CohortGridResult> {
rows.iter().cloned().min_by(|left, right| {
primary_success_gap(left)
.partial_cmp(&primary_success_gap(right))
.unwrap_or(Ordering::Equal)
.then_with(|| compare_successful_rows(left, right))
})
}
fn compare_successful_rows(left: &CohortGridResult, right: &CohortGridResult) -> Ordering {
left.pass_run_nuisance_proxy
.partial_cmp(&right.pass_run_nuisance_proxy)
.unwrap_or(Ordering::Equal)
.then_with(|| right.failure_recall.cmp(&left.failure_recall))
.then_with(|| compare_option_f64(right.mean_lead_time_runs, left.mean_lead_time_runs))
.then_with(|| compare_option_f64(right.precursor_quality, left.precursor_quality))
.then_with(|| compare_option_f64(right.compression_ratio, left.compression_ratio))
.then_with(|| left.cohort_name.cmp(&right.cohort_name))
.then_with(|| left.window.cmp(&right.window))
.then_with(|| left.persistence_runs.cmp(&right.persistence_runs))
.then_with(|| left.corroborating_m.cmp(&right.corroborating_m))
}
fn primary_success_gap(row: &CohortGridResult) -> f64 {
let nuisance_gap = (row.pass_run_nuisance_proxy - row.ewma_nuisance).max(0.0);
let recall_floor = row.threshold_recall.saturating_sub(RECALL_TOLERANCE);
let recall_gap =
recall_floor.saturating_sub(row.failure_recall) as f64 / row.threshold_recall.max(1) as f64;
nuisance_gap + recall_gap
}
fn corroboration_effect(rows: &[CohortGridResult]) -> String {
let best_m1 = rows
.iter()
.filter(|row| row.corroborating_m == 1)
.min_by(|left, right| {
primary_success_gap(left)
.partial_cmp(&primary_success_gap(right))
.unwrap_or(Ordering::Equal)
});
let best_m_gt_1 = rows
.iter()
.filter(|row| row.corroborating_m > 1)
.min_by(|left, right| {
primary_success_gap(left)
.partial_cmp(&primary_success_gap(right))
.unwrap_or(Ordering::Equal)
});
match (best_m1, best_m_gt_1) {
(Some(best_m1), Some(best_m_gt_1)) => {
let m1_gap = primary_success_gap(best_m1);
let higher_gap = primary_success_gap(best_m_gt_1);
if higher_gap + 1.0e-9 < m1_gap {
format!(
"Cross-feature corroboration improved the closest result: {} beat {} with gap {:.4} vs {:.4}.",
row_label(best_m_gt_1),
row_label(best_m1),
higher_gap,
m1_gap,
)
} else if m1_gap + 1.0e-9 < higher_gap {
format!(
"Cross-feature corroboration degraded the closest result: {} beat {} with gap {:.4} vs {:.4}.",
row_label(best_m1),
row_label(best_m_gt_1),
m1_gap,
higher_gap,
)
} else {
"Cross-feature corroboration produced effectively tied nuisance/recall trade-offs."
.to_string()
}
}
_ => "Cross-feature corroboration effect could not be separated from the saved sweep."
.to_string(),
}
}
fn limiting_factor_from_row(
row: Option<&CohortGridResult>,
ewma_nuisance: f64,
threshold_recall: usize,
) -> String {
let Some(row) = row else {
return "No cohort row was available for limiting-factor analysis.".into();
};
let nuisance_ok = row.pass_run_nuisance_proxy < ewma_nuisance;
let recall_ok = row.failure_recall + RECALL_TOLERANCE >= threshold_recall;
match (nuisance_ok, recall_ok) {
(false, true) => "Nuisance was the limiting factor.".into(),
(true, false) => "Recall was the limiting factor.".into(),
(false, false) => "Both nuisance and recall remained limiting factors.".into(),
(true, true) => "The legacy one-run-tolerance cohort gate was met on this row.".into(),
}
}
fn build_failure_analysis(
rows: &[CohortGridResult],
motif_policy_rows: &[CohortMotifPolicyContributionRow],
cohorts: &FeatureCohorts,
ewma_nuisance: f64,
threshold_recall: usize,
selected_row: Option<&CohortGridResult>,
corroboration_effect: &str,
limiting_factor: &str,
) -> Option<CohortFailureAnalysis> {
let closest = choose_closest_to_success(rows)?;
let best_all_features = rows
.iter()
.filter(|row| row.cohort_name == "all_features")
.cloned()
.collect::<Vec<_>>();
let best_ranked = rows
.iter()
.filter(|row| row.cohort_name != "all_features")
.cloned()
.collect::<Vec<_>>();
let best_all_features = best_row(&best_all_features);
let best_ranked = best_row(&best_ranked);
let all_feature_dsa_vs_cohort_note = match (&best_all_features, &best_ranked) {
(Some(best_all_features), Some(best_ranked)) => {
let all_gap = primary_success_gap(best_all_features);
let ranked_gap = primary_success_gap(best_ranked);
if ranked_gap + 1.0e-9 < all_gap {
format!(
"Ranked cohort DSA was better than all-feature DSA: {} beat {}.",
row_label(best_ranked),
row_label(best_all_features),
)
} else if all_gap + 1.0e-9 < ranked_gap {
format!(
"All-feature DSA remained better than the ranked cohorts: {} beat {}.",
row_label(best_all_features),
row_label(best_ranked),
)
} else {
"All-feature DSA and the best ranked cohort were effectively tied.".into()
}
}
_ => {
"Not enough saved cohort rows to compare all-feature DSA against ranked cohorts.".into()
}
};
let ranking_reference = best_ranked
.as_ref()
.map(|row| row.cohort_name.clone())
.unwrap_or_else(|| closest.cohort_name.clone());
let ranking_quality_note = ranking_quality_note(cohorts, &ranking_reference);
let best_near_success_source = selected_row
.map(row_label)
.unwrap_or_else(|| row_label(&closest));
let policy_vs_numeric_note = policy_vs_numeric_note(&closest);
let nuisance_motif_classes = dominant_motif_note(motif_policy_rows, closest.grid_row_id, true);
let useful_precursor_motif_classes =
dominant_motif_note(motif_policy_rows, closest.grid_row_id, false);
Some(CohortFailureAnalysis {
closest_cohort: closest.cohort_name.clone(),
closest_grid_point: row_grid_point(&closest),
closest_policy_setting: row_label(&closest),
closest_nuisance: closest.pass_run_nuisance_proxy,
closest_recall: closest.failure_recall,
ewma_nuisance,
threshold_recall,
limiting_factor: limiting_factor.to_string(),
corroboration_effect: corroboration_effect.to_string(),
policy_vs_numeric_note,
ranking_quality_note,
all_feature_dsa_vs_cohort_note,
best_near_success_source,
nuisance_motif_classes,
useful_precursor_motif_classes,
})
}
fn policy_vs_numeric_note(row: &CohortGridResult) -> String {
if row.pass_run_nuisance_delta_vs_numeric_dsa < 0.0 && row.policy_vs_numeric_recall_delta >= 0 {
format!(
"Policy suppression helped relative to numeric-only DSA: nuisance improved from {:.4} to {:.4} without recall loss ({} to {}).",
row.numeric_pass_run_nuisance_proxy,
row.pass_run_nuisance_proxy,
row.numeric_failure_recall,
row.failure_recall,
)
} else if row.pass_run_nuisance_delta_vs_numeric_dsa < 0.0 {
format!(
"Policy suppression reduced nuisance relative to numeric-only DSA ({:.4} to {:.4}) but lost recall ({} to {}).",
row.numeric_pass_run_nuisance_proxy,
row.pass_run_nuisance_proxy,
row.numeric_failure_recall,
row.failure_recall,
)
} else if row.pass_run_nuisance_delta_vs_numeric_dsa > 0.0 {
format!(
"Policy suppression hurt nuisance relative to numeric-only DSA: {:.4} vs {:.4}.",
row.pass_run_nuisance_proxy, row.numeric_pass_run_nuisance_proxy,
)
} else {
"Policy suppression and numeric-only DSA were effectively tied on pass-run nuisance.".into()
}
}
fn dominant_motif_note(
motif_policy_rows: &[CohortMotifPolicyContributionRow],
grid_row_id: usize,
nuisance: bool,
) -> String {
let mut rows = motif_policy_rows
.iter()
.filter(|row| row.grid_row_id == grid_row_id)
.collect::<Vec<_>>();
if rows.is_empty() {
return "No motif-policy contribution rows were available.".into();
}
rows.sort_by(|left, right| {
let left_score = if nuisance {
left.pass_review_or_escalate_points
} else {
left.pre_failure_review_or_escalate_points
};
let right_score = if nuisance {
right.pass_review_or_escalate_points
} else {
right.pre_failure_review_or_escalate_points
};
right_score
.cmp(&left_score)
.then_with(|| left.motif_name.cmp(&right.motif_name))
});
let top = rows[0];
let score = if nuisance {
top.pass_review_or_escalate_points
} else {
top.pre_failure_review_or_escalate_points
};
if nuisance {
format!(
"{} ({:?}) contributed the most pass-run Review/Escalate points: {}.",
top.motif_name, top.alert_class_default, score
)
} else {
format!(
"{} ({:?}) contributed the most pre-failure Review/Escalate points: {}.",
top.motif_name, top.alert_class_default, score
)
}
}
fn ranking_quality_note(cohorts: &FeatureCohorts, cohort_name: &str) -> String {
let selected = cohort_members(cohorts, cohort_name);
if selected.is_empty() {
return "Ranking quality could not be assessed because the selected cohort was empty."
.to_string();
}
let selected_violation_ratio = average_ratio(
selected,
|member| member.dsfb_violation_points,
|member| member.dsfb_boundary_points,
);
let selected_threshold_ratio = average_ratio(
selected,
|member| member.threshold_alarm_points,
|member| member.dsfb_boundary_points,
);
let all_violation_ratio = average_ratio(
&cohorts.all_features,
|member| member.dsfb_violation_points,
|member| member.dsfb_boundary_points,
);
let all_threshold_ratio = average_ratio(
&cohorts.all_features,
|member| member.threshold_alarm_points,
|member| member.dsfb_boundary_points,
);
if selected_violation_ratio > all_violation_ratio * 1.25
|| selected_threshold_ratio > all_threshold_ratio * 1.25
{
format!(
"Ranking appears to have over-selected noisy features: cohort violation/boundary ratio {:.4} vs all-feature {:.4}, threshold/boundary ratio {:.4} vs all-feature {:.4}.",
selected_violation_ratio,
all_violation_ratio,
selected_threshold_ratio,
all_threshold_ratio,
)
} else {
format!(
"Ranking did not obviously over-select noisy features: cohort violation/boundary ratio {:.4} vs all-feature {:.4}, threshold/boundary ratio {:.4} vs all-feature {:.4}.",
selected_violation_ratio,
all_violation_ratio,
selected_threshold_ratio,
all_threshold_ratio,
)
}
}
fn rebuild_selected_evaluation(
dataset: &PreparedDataset,
nominal: &NominalModel,
residuals: &ResidualSet,
signs: &SignSet,
baselines: &BaselineSet,
grammar: &GrammarSet,
cohorts: &FeatureCohorts,
pre_failure_lookback_runs: usize,
row: &CohortGridResult,
) -> Result<DsaEvaluation> {
let base_config = DsaConfig {
window: row.window,
persistence_runs: row.persistence_runs,
alert_tau: row.alert_tau,
corroborating_feature_count_min: 1,
};
let base_evaluation = evaluate_dsa(
dataset,
nominal,
residuals,
signs,
baselines,
grammar,
&base_config,
pre_failure_lookback_runs,
)?;
let feature_indices = cohort_members(cohorts, &row.cohort_name)
.iter()
.map(|member| member.feature_index)
.collect::<Vec<_>>();
project_dsa_to_cohort(
dataset,
nominal,
residuals,
baselines,
grammar,
&base_evaluation,
&feature_indices,
row.corroborating_m,
pre_failure_lookback_runs,
&row.cohort_name,
)
}
fn fallback_row_from_dsa(dsa: &DsaEvaluation, metrics: &BenchmarkMetrics) -> CohortGridResult {
CohortGridResult {
ranking_strategy: "selected".into(),
ranking_formula: "selected evaluation".into(),
grid_row_id: 0,
feature_trace_config_id: 0,
cohort_name: "default_all_features".into(),
cohort_size: dsa.summary.analyzable_feature_count,
window: dsa.summary.config.window,
persistence_runs: dsa.summary.config.persistence_runs,
alert_tau: dsa.summary.config.alert_tau,
corroborating_m: dsa.summary.config.corroborating_feature_count_min,
primary_run_signal: dsa.run_signals.primary_run_signal.clone(),
failure_recall: dsa.summary.failure_run_recall,
failure_runs: dsa.summary.failure_runs,
failure_recall_rate: dsa.summary.failure_run_recall_rate,
threshold_recall: metrics.summary.failure_runs_with_preceding_threshold_signal,
ewma_recall: metrics.summary.failure_runs_with_preceding_ewma_signal,
failure_recall_delta_vs_threshold: dsa.comparison_summary.failure_recall_delta_vs_threshold,
failure_recall_delta_vs_ewma: dsa.comparison_summary.failure_recall_delta_vs_ewma,
mean_lead_time_runs: dsa.summary.mean_lead_time_runs,
median_lead_time_runs: dsa.summary.median_lead_time_runs,
threshold_mean_lead_time_runs: metrics.lead_time_summary.mean_threshold_lead_runs,
ewma_mean_lead_time_runs: metrics.lead_time_summary.mean_ewma_lead_runs,
mean_lead_delta_vs_threshold_runs: dsa.summary.mean_lead_delta_vs_threshold_runs,
mean_lead_delta_vs_ewma_runs: dsa.summary.mean_lead_delta_vs_ewma_runs,
pass_run_nuisance_proxy: dsa.summary.pass_run_nuisance_proxy,
numeric_pass_run_nuisance_proxy: dsa.summary.numeric_primary_pass_run_nuisance_proxy,
ewma_nuisance: metrics.summary.pass_run_ewma_nuisance_rate,
threshold_nuisance: metrics.summary.pass_run_threshold_nuisance_rate,
pass_run_nuisance_delta_vs_ewma: dsa.comparison_summary.pass_run_nuisance_delta_vs_ewma,
pass_run_nuisance_delta_vs_threshold: dsa
.comparison_summary
.pass_run_nuisance_delta_vs_threshold,
pass_run_nuisance_delta_vs_numeric_dsa: dsa
.comparison_summary
.pass_run_nuisance_delta_vs_numeric_dsa,
raw_boundary_episode_count: dsa.episode_summary.raw_boundary_episode_count,
dsa_episode_count: dsa.episode_summary.dsa_episode_count,
dsa_episodes_preceding_failure: dsa.episode_summary.dsa_episodes_preceding_failure,
mean_dsa_episode_length_runs: dsa.episode_summary.mean_dsa_episode_length_runs,
max_dsa_episode_length_runs: dsa.episode_summary.max_dsa_episode_length_runs,
compression_ratio: dsa.episode_summary.compression_ratio,
precursor_quality: dsa.episode_summary.precursor_quality,
non_escalating_dsa_episode_fraction: dsa
.episode_summary
.non_escalating_dsa_episode_fraction,
feature_level_active_points: dsa
.traces
.iter()
.map(|trace| trace.dsa_active.iter().filter(|flag| **flag).count())
.sum(),
feature_level_alert_points: dsa
.traces
.iter()
.map(|trace| trace.dsa_alert.iter().filter(|flag| **flag).count())
.sum(),
persistence_suppression_fraction: overall_persistence_suppression_fraction(dsa),
numeric_failure_recall: dsa.summary.numeric_primary_failure_run_recall,
policy_vs_numeric_recall_delta: dsa.comparison_summary.policy_vs_numeric_recall_delta,
watch_point_count: dsa.summary.watch_point_count,
review_point_count: dsa.summary.review_point_count,
escalate_point_count: dsa.summary.escalate_point_count,
investigation_point_count: dsa.summary.alert_point_count,
numeric_investigation_point_count: dsa.summary.numeric_alert_point_count,
silenced_point_count: dsa.summary.silenced_point_count,
rescued_point_count: dsa.summary.rescued_point_count,
rescued_watch_to_review_points: dsa.summary.rescued_watch_to_review_points,
rescued_review_to_escalate_points: dsa.summary.rescued_review_to_escalate_points,
review_escalate_points_per_pass_run: 0.0,
numeric_alert_points_per_pass_run: 0.0,
review_escalate_episodes_per_pass_run: 0.0,
numeric_alert_episodes_per_pass_run: 0.0,
primary_success: dsa.summary.pass_run_nuisance_proxy
< metrics.summary.pass_run_ewma_nuisance_rate
&& dsa.summary.failure_run_recall + RECALL_TOLERANCE
>= metrics.summary.failure_runs_with_preceding_threshold_signal,
primary_success_reason: primary_success_reason(
dsa.summary.failure_run_recall,
metrics.summary.failure_runs_with_preceding_threshold_signal,
dsa.summary.pass_run_nuisance_proxy,
metrics.summary.pass_run_ewma_nuisance_rate,
),
}
}
fn best_all_features_row(summary: &CohortDsaSummary) -> Option<&CohortGridResult> {
summary
.best_by_cohort
.iter()
.find(|best| best.cohort_name.starts_with("all_features"))
.map(|best| &best.best_row)
}
fn determine_rating_limiting_factor(
cohort_summary: Option<&CohortDsaSummary>,
chosen: &CohortGridResult,
ewma_nuisance: f64,
threshold_recall: usize,
) -> String {
if let Some(summary) = cohort_summary {
let best_all_features = best_all_features_row(summary);
let best_ranked = summary
.best_by_cohort
.iter()
.filter(|best| best.cohort_name != "all_features")
.map(|best| &best.best_row)
.min_by(|left, right| {
primary_success_gap(left)
.partial_cmp(&primary_success_gap(right))
.unwrap_or(Ordering::Equal)
});
if let (Some(best_all_features), Some(best_ranked)) = (best_all_features, best_ranked) {
if primary_success_gap(best_all_features) + 1.0e-9 < primary_success_gap(best_ranked) {
return format!(
"cohort selection: {} stayed closer to the nuisance/recall target than {}",
row_label(best_all_features),
row_label(best_ranked),
);
}
}
let same_cohort_rows = summary
.cohort_results
.iter()
.filter(|row| row.cohort_name == chosen.cohort_name)
.collect::<Vec<_>>();
let any_recall_ok = same_cohort_rows
.iter()
.any(|row| row.failure_recall + RECALL_TOLERANCE >= threshold_recall);
let any_nuisance_ok = same_cohort_rows
.iter()
.any(|row| row.pass_run_nuisance_proxy < ewma_nuisance);
let any_joint_success = same_cohort_rows.iter().any(|row| row.primary_success);
if any_recall_ok && any_nuisance_ok && !any_joint_success {
return format!(
"corroboration threshold: cohort {} required different m values to satisfy recall and nuisance separately, but no single corroboration count satisfied both",
chosen.cohort_name,
);
}
}
if let Some(persistence_suppression_fraction) = chosen.persistence_suppression_fraction {
if persistence_suppression_fraction > 0.25
&& chosen.failure_recall + RECALL_TOLERANCE < threshold_recall
{
return format!(
"persistence gate: {:.1}% of feature-level active points were suppressed before alert emission in {}",
persistence_suppression_fraction * 100.0,
row_label(chosen),
);
}
}
format!(
"DSA score composition: even the closest configuration ({}) left nuisance {:.4} vs EWMA {:.4} and recall {} vs threshold {} - {}",
row_label(chosen),
chosen.pass_run_nuisance_proxy,
ewma_nuisance,
chosen.failure_recall,
threshold_recall,
RECALL_TOLERANCE,
)
}
fn build_category_forecasts(
primary_success_met: bool,
secondary_targets_met: bool,
) -> Vec<CategoryForecast> {
if primary_success_met && secondary_targets_met {
vec![
CategoryForecast {
category: "empirical_rigor".into(),
current: "strong".into(),
forecast: "strong".into(),
justification:
"Measured DSA nuisance reduction with recall preservation and lead-time parity strengthens the empirical package."
.into(),
},
CategoryForecast {
category: "operator_usefulness".into(),
current: "moderate".into(),
forecast: "strong".into(),
justification:
"Operator-facing nuisance fell below EWMA while recall stayed near threshold level."
.into(),
},
CategoryForecast {
category: "sbir_readiness".into(),
current: "moderate".into(),
forecast: "strong".into(),
justification:
"A concrete DSA win over scalar monitoring baselines improves commercialization credibility."
.into(),
},
CategoryForecast {
category: "licensing_readiness".into(),
current: "moderate".into(),
forecast: "moderate-strong".into(),
justification:
"Measured operator value supports licensing discussions, while evidence remains bounded to the current benchmark."
.into(),
},
CategoryForecast {
category: "paper_readiness".into(),
current: "moderate".into(),
forecast: "strong".into(),
justification:
"Feature-cohort DSA would add a concrete positive empirical result to the paper narrative."
.into(),
},
]
} else if primary_success_met {
vec![
CategoryForecast {
category: "empirical_rigor".into(),
current: "strong".into(),
forecast: "strong".into(),
justification:
"Primary success is still a hard empirical result even without full lead-time improvement."
.into(),
},
CategoryForecast {
category: "operator_usefulness".into(),
current: "moderate".into(),
forecast: "moderate-strong".into(),
justification:
"Lower nuisance with preserved recall is a partial operator-facing improvement."
.into(),
},
CategoryForecast {
category: "sbir_readiness".into(),
current: "moderate".into(),
forecast: "moderate-strong".into(),
justification:
"Primary success advances readiness even if secondary improvements are incomplete."
.into(),
},
CategoryForecast {
category: "licensing_readiness".into(),
current: "moderate".into(),
forecast: "moderate".into(),
justification:
"Without stronger secondary metrics the licensing case improves only modestly."
.into(),
},
CategoryForecast {
category: "paper_readiness".into(),
current: "moderate".into(),
forecast: "moderate-strong".into(),
justification:
"A bounded success claim remains paper-relevant even without stronger lead-time gains."
.into(),
},
]
} else {
vec![
CategoryForecast {
category: "empirical_rigor".into(),
current: "strong".into(),
forecast: "strong".into(),
justification:
"The package remains rigorous even when cohort DSA does not clear the forecast target."
.into(),
},
CategoryForecast {
category: "operator_usefulness".into(),
current: "moderate".into(),
forecast: "moderate".into(),
justification:
"No measured cohort configuration achieved the target nuisance/recall trade-off."
.into(),
},
CategoryForecast {
category: "sbir_readiness".into(),
current: "moderate".into(),
forecast: "moderate".into(),
justification: "Without a concrete DSA win, readiness does not materially change."
.into(),
},
CategoryForecast {
category: "licensing_readiness".into(),
current: "moderate".into(),
forecast: "moderate".into(),
justification: "No measured licensing-relevant delta was demonstrated.".into(),
},
CategoryForecast {
category: "paper_readiness".into(),
current: "moderate".into(),
forecast: "moderate".into(),
justification:
"The negative result remains publishable, but it does not support a stronger forecast."
.into(),
},
]
}
}
fn cohort_member(row: &FeatureRankingRow, cohort_name: &str) -> CohortMember {
CohortMember {
feature_index: row.feature_index,
feature_name: row.feature_name.clone(),
ranking_score: row.candidate_score,
dsfb_boundary_points: row.dsfb_raw_boundary_points,
dsfb_violation_points: row.dsfb_raw_violation_points,
ewma_alarm_points: row.ewma_alarm_points,
threshold_alarm_points: row.threshold_alarm_points,
missing_fraction: row.missing_fraction,
reason_for_inclusion: format!(
"Included in {} at rank {} because score {:.4} = z_boundary({:+.4}) - z_violation({:+.4}) + z_ewma({:+.4}) - penalty({:.1}).",
cohort_name,
row.rank,
row.candidate_score,
row.z_boundary,
row.z_violation,
row.z_ewma,
row.missingness_penalty,
),
}
}
fn seed_membership_note(row: &FeatureRankingRow, cutoff: usize, cohort_name: &str) -> String {
if row.rank <= cutoff {
format!(
"Included in {} at rank {} with score {:.4}.",
cohort_name, row.rank, row.candidate_score
)
} else {
format!(
"Excluded from {} because rank {} is outside the cutoff. Score {:.4} = z_boundary({:+.4}) - z_violation({:+.4}) + z_ewma({:+.4}) - penalty({:.1}).",
cohort_name,
row.rank,
row.candidate_score,
row.z_boundary,
row.z_violation,
row.z_ewma,
row.missingness_penalty,
)
}
}
fn cohort_members<'a>(cohorts: &'a FeatureCohorts, cohort_name: &str) -> &'a [CohortMember] {
match cohort_name {
"top_4" => cohorts.top_4.as_slice(),
"top_8" => cohorts.top_8.as_slice(),
"top_16" => cohorts.top_16.as_slice(),
"all_features" => cohorts.all_features.as_slice(),
_ => &[],
}
}
fn average_ratio<T, FNum, FDen>(items: &[T], numerator: FNum, denominator: FDen) -> f64
where
FNum: Fn(&T) -> usize,
FDen: Fn(&T) -> usize,
{
if items.is_empty() {
return 0.0;
}
items
.iter()
.map(|item| numerator(item) as f64 / denominator(item).max(1) as f64)
.sum::<f64>()
/ items.len() as f64
}
fn primary_success_reason(
failure_recall: usize,
threshold_recall: usize,
nuisance: f64,
ewma_nuisance: f64,
) -> String {
let nuisance_ok = nuisance < ewma_nuisance;
let recall_ok = failure_recall + RECALL_TOLERANCE >= threshold_recall;
if nuisance_ok && recall_ok {
format!(
"Success: nuisance {:.4} < EWMA {:.4} and recall {} >= threshold {} - {}.",
nuisance, ewma_nuisance, failure_recall, threshold_recall, RECALL_TOLERANCE
)
} else {
let mut parts = Vec::new();
if !nuisance_ok {
parts.push(format!(
"nuisance {:.4} >= EWMA {:.4}",
nuisance, ewma_nuisance
));
}
if !recall_ok {
parts.push(format!(
"recall {} < threshold {} - {}",
failure_recall, threshold_recall, RECALL_TOLERANCE
));
}
parts.join("; ")
}
}
fn row_grid_point(row: &CohortGridResult) -> String {
format!(
"W={}, K={}, tau={:.1}, m={}",
row.window, row.persistence_runs, row.alert_tau, row.corroborating_m
)
}
fn row_label(row: &CohortGridResult) -> String {
format!(
"{} [{}] ({})",
row.cohort_name,
row.ranking_strategy,
row_grid_point(row)
)
}
fn optimization_priority_order() -> Vec<String> {
vec![
"1. Maximize delta_nuisance_vs_ewma".into(),
"2. Preserve or improve recall toward 103/104 and ideally 104/104".into(),
"3. Maximize precursor quality".into(),
"4. Preserve or improve mean lead time vs EWMA/threshold".into(),
"5. Maintain or improve compression ratio without sacrificing recall badly".into(),
]
}
fn predeclared_primary_target() -> String {
format!(
"delta_nuisance_vs_ewma >= {:.2} AND DSA recall >= 103/104, where delta_nuisance_vs_ewma = (EWMA_nuisance - DSA_nuisance) / EWMA_nuisance",
PRIMARY_DELTA_TARGET
)
}
fn predeclared_secondary_target() -> String {
format!(
"delta_nuisance_vs_current_dsa >= {:.2} AND DSA recall >= 100/104, where delta_nuisance_vs_current_dsa = (current_policy_dsa_nuisance - optimized_dsa_nuisance) / current_policy_dsa_nuisance",
SECONDARY_DELTA_TARGET
)
}
fn primary_success_condition() -> String {
format!(
"pass-run nuisance < EWMA nuisance AND failure recall >= threshold recall - {} run(s)",
RECALL_TOLERANCE
)
}
fn rating_primary_success_condition() -> String {
format!(
"DSA pass-run nuisance < EWMA pass-run nuisance AND DSA failure recall >= threshold failure recall - {} run(s)",
RECALL_TOLERANCE
)
}
fn overall_persistence_suppression_fraction(dsa: &DsaEvaluation) -> Option<f64> {
let active_points = dsa
.traces
.iter()
.map(|trace| trace.dsa_active.iter().filter(|flag| **flag).count())
.sum::<usize>();
let alert_points = dsa
.traces
.iter()
.map(|trace| trace.dsa_alert.iter().filter(|flag| **flag).count())
.sum::<usize>();
if active_points == 0 {
None
} else {
Some(1.0 - alert_points as f64 / active_points as f64)
}
}
fn compare_option_gt(left: Option<f64>, right: Option<f64>) -> Option<bool> {
Some(left? > right?)
}
fn format_option_csv(value: Option<f64>) -> String {
value.map(|value| format!("{value:.6}")).unwrap_or_default()
}
fn format_option_f64(value: Option<f64>) -> String {
value
.map(|value| format!("{value:.4}"))
.unwrap_or_else(|| "n/a".into())
}
fn paired_ge(left: Option<f64>, right: Option<f64>) -> bool {
matches!((left, right), (Some(left), Some(right)) if left >= right)
}
fn compare_option_f64(left: Option<f64>, right: Option<f64>) -> Ordering {
match (left, right) {
(Some(left), Some(right)) => left.partial_cmp(&right).unwrap_or(Ordering::Equal),
(Some(_), None) => Ordering::Greater,
(None, Some(_)) => Ordering::Less,
(None, None) => Ordering::Equal,
}
}
fn mean_std(values: &[f64]) -> (f64, f64) {
if values.is_empty() {
return (0.0, 1.0);
}
let mean = values.iter().sum::<f64>() / values.len() as f64;
let variance = values
.iter()
.map(|value| (value - mean).powi(2))
.sum::<f64>()
/ values.len() as f64;
let std = variance.sqrt();
(mean, if std > f64::EPSILON { std } else { 1.0 })
}
fn z_score(value: f64, mean: f64, std: f64) -> f64 {
(value - mean) / std
}
#[cfg(test)]
mod tests {
use super::*;
use crate::metrics::{
BenchmarkMetrics, BenchmarkSummary, BoundaryEpisodeSummary, DensitySummary, LeadTimeSummary,
};
use crate::preprocessing::DatasetSummary;
fn sample_ranking() -> Vec<FeatureRankingRow> {
vec![
FeatureRankingRow {
ranking_strategy: "compression_biased".into(),
ranking_formula: RANKING_FORMULA.into(),
feature_index: 58,
feature_name: "S059".into(),
dsfb_raw_boundary_points: 682,
dsfb_persistent_boundary_points: 650,
dsfb_raw_violation_points: 31,
dsfb_persistent_violation_points: 4,
ewma_alarm_points: 624,
threshold_alarm_points: 31,
pre_failure_run_hits: 20,
motif_precision_proxy: Some(0.6),
recall_rescue_contribution: None,
operator_burden_contribution: None,
semantic_persistence_contribution: None,
grouped_semantic_support: None,
violation_overdominance_penalty: None,
missing_fraction: 0.0025,
z_pre_failure_run_hits: None,
z_motif_precision_proxy: None,
z_recall_rescue_contribution: None,
z_operator_burden_contribution: None,
z_semantic_persistence_contribution: None,
z_grouped_semantic_support: None,
z_violation_overdominance_penalty: None,
z_boundary: 5.0,
z_violation: -0.1,
z_ewma: 3.0,
missingness_penalty: 0.0,
candidate_score: 8.1,
score_breakdown: "".into(),
rank: 1,
},
FeatureRankingRow {
ranking_strategy: "compression_biased".into(),
ranking_formula: RANKING_FORMULA.into(),
feature_index: 43,
feature_name: "S044".into(),
dsfb_raw_boundary_points: 400,
dsfb_persistent_boundary_points: 380,
dsfb_raw_violation_points: 18,
dsfb_persistent_violation_points: 2,
ewma_alarm_points: 210,
threshold_alarm_points: 18,
pre_failure_run_hits: 14,
motif_precision_proxy: Some(0.5),
recall_rescue_contribution: None,
operator_burden_contribution: None,
semantic_persistence_contribution: None,
grouped_semantic_support: None,
violation_overdominance_penalty: None,
missing_fraction: 0.01,
z_pre_failure_run_hits: None,
z_motif_precision_proxy: None,
z_recall_rescue_contribution: None,
z_operator_burden_contribution: None,
z_semantic_persistence_contribution: None,
z_grouped_semantic_support: None,
z_violation_overdominance_penalty: None,
z_boundary: 1.2,
z_violation: -0.5,
z_ewma: 0.9,
missingness_penalty: 0.0,
candidate_score: 2.6,
score_breakdown: "".into(),
rank: 2,
},
FeatureRankingRow {
ranking_strategy: "compression_biased".into(),
ranking_formula: RANKING_FORMULA.into(),
feature_index: 60,
feature_name: "S061".into(),
dsfb_raw_boundary_points: 340,
dsfb_persistent_boundary_points: 320,
dsfb_raw_violation_points: 18,
dsfb_persistent_violation_points: 1,
ewma_alarm_points: 190,
threshold_alarm_points: 18,
pre_failure_run_hits: 12,
motif_precision_proxy: Some(0.45),
recall_rescue_contribution: None,
operator_burden_contribution: None,
semantic_persistence_contribution: None,
grouped_semantic_support: None,
violation_overdominance_penalty: None,
missing_fraction: 0.01,
z_pre_failure_run_hits: None,
z_motif_precision_proxy: None,
z_recall_rescue_contribution: None,
z_operator_burden_contribution: None,
z_semantic_persistence_contribution: None,
z_grouped_semantic_support: None,
z_violation_overdominance_penalty: None,
z_boundary: 1.0,
z_violation: -0.5,
z_ewma: 0.8,
missingness_penalty: 0.0,
candidate_score: 2.3,
score_breakdown: "".into(),
rank: 3,
},
FeatureRankingRow {
ranking_strategy: "compression_biased".into(),
ranking_formula: RANKING_FORMULA.into(),
feature_index: 221,
feature_name: "S222".into(),
dsfb_raw_boundary_points: 341,
dsfb_persistent_boundary_points: 300,
dsfb_raw_violation_points: 7,
dsfb_persistent_violation_points: 0,
ewma_alarm_points: 160,
threshold_alarm_points: 7,
pre_failure_run_hits: 11,
motif_precision_proxy: Some(0.55),
recall_rescue_contribution: None,
operator_burden_contribution: None,
semantic_persistence_contribution: None,
grouped_semantic_support: None,
violation_overdominance_penalty: None,
missing_fraction: 0.02,
z_pre_failure_run_hits: None,
z_motif_precision_proxy: None,
z_recall_rescue_contribution: None,
z_operator_burden_contribution: None,
z_semantic_persistence_contribution: None,
z_grouped_semantic_support: None,
z_violation_overdominance_penalty: None,
z_boundary: 1.1,
z_violation: -0.8,
z_ewma: 0.6,
missingness_penalty: 0.0,
candidate_score: 2.5,
score_breakdown: "".into(),
rank: 4,
},
]
}
fn sample_metrics_for_delta_target() -> BenchmarkMetrics {
BenchmarkMetrics {
summary: BenchmarkSummary {
dataset_summary: DatasetSummary {
run_count: 10,
feature_count: 3,
pass_count: 8,
fail_count: 2,
dataset_missing_fraction: 0.0,
healthy_pass_runs_requested: 3,
healthy_pass_runs_found: 3,
},
analyzable_feature_count: 3,
grammar_imputation_suppression_points: 0,
threshold_alarm_points: 0,
ewma_alarm_points: 0,
cusum_alarm_points: 0,
run_energy_alarm_points: 0,
pca_fdc_alarm_points: 0,
dsfb_raw_boundary_points: 0,
dsfb_persistent_boundary_points: 0,
dsfb_raw_violation_points: 0,
dsfb_persistent_violation_points: 0,
failure_runs: 104,
failure_runs_with_preceding_dsfb_raw_signal: 0,
failure_runs_with_preceding_dsfb_persistent_signal: 0,
failure_runs_with_preceding_dsfb_raw_boundary_signal: 0,
failure_runs_with_preceding_dsfb_persistent_boundary_signal: 0,
failure_runs_with_preceding_dsfb_raw_violation_signal: 0,
failure_runs_with_preceding_dsfb_persistent_violation_signal: 0,
failure_runs_with_preceding_ewma_signal: 104,
failure_runs_with_preceding_cusum_signal: 104,
failure_runs_with_preceding_run_energy_signal: 0,
failure_runs_with_preceding_pca_fdc_signal: 103,
failure_runs_with_preceding_threshold_signal: 104,
pass_runs: 731,
pass_runs_with_dsfb_raw_boundary_signal: 0,
pass_runs_with_dsfb_persistent_boundary_signal: 0,
pass_runs_with_dsfb_raw_violation_signal: 0,
pass_runs_with_dsfb_persistent_violation_signal: 0,
pass_runs_with_ewma_signal: 0,
pass_runs_with_cusum_signal: 0,
pass_runs_with_run_energy_signal: 0,
pass_runs_with_pca_fdc_signal: 0,
pass_runs_with_threshold_signal: 0,
pass_run_dsfb_raw_boundary_nuisance_rate: 0.9986329460,
pass_run_dsfb_persistent_boundary_nuisance_rate: 0.9904,
pass_run_dsfb_raw_violation_nuisance_rate: 0.9740259740,
pass_run_dsfb_persistent_violation_nuisance_rate: 0.7724,
pass_run_ewma_nuisance_rate: 0.9863294600136705,
pass_run_cusum_nuisance_rate: 1.0,
pass_run_run_energy_nuisance_rate: 0.5263,
pass_run_pca_fdc_nuisance_rate: 0.9316,
pass_run_threshold_nuisance_rate: 0.974025974025974,
},
lead_time_summary: LeadTimeSummary {
failure_runs_with_raw_boundary_lead: 103,
failure_runs_with_persistent_boundary_lead: 103,
failure_runs_with_raw_violation_lead: 104,
failure_runs_with_persistent_violation_lead: 104,
failure_runs_with_threshold_lead: 104,
failure_runs_with_ewma_lead: 104,
failure_runs_with_cusum_lead: 104,
failure_runs_with_run_energy_lead: 0,
failure_runs_with_pca_fdc_lead: 103,
mean_raw_boundary_lead_runs: Some(19.67),
mean_persistent_boundary_lead_runs: Some(19.54),
mean_raw_violation_lead_runs: Some(19.56),
mean_persistent_violation_lead_runs: Some(18.0),
mean_threshold_lead_runs: Some(19.557692307692307),
mean_ewma_lead_runs: Some(19.576923076923077),
mean_cusum_lead_runs: Some(19.58653846153846),
mean_run_energy_lead_runs: Some(16.31),
mean_pca_fdc_lead_runs: Some(19.009708737864077),
mean_raw_boundary_minus_cusum_delta_runs: None,
mean_raw_boundary_minus_run_energy_delta_runs: None,
mean_raw_boundary_minus_pca_fdc_delta_runs: None,
mean_raw_boundary_minus_threshold_delta_runs: None,
mean_raw_boundary_minus_ewma_delta_runs: None,
mean_persistent_boundary_minus_cusum_delta_runs: None,
mean_persistent_boundary_minus_run_energy_delta_runs: None,
mean_persistent_boundary_minus_pca_fdc_delta_runs: None,
mean_persistent_boundary_minus_threshold_delta_runs: None,
mean_persistent_boundary_minus_ewma_delta_runs: None,
mean_raw_violation_minus_cusum_delta_runs: None,
mean_raw_violation_minus_run_energy_delta_runs: None,
mean_raw_violation_minus_pca_fdc_delta_runs: None,
mean_raw_violation_minus_threshold_delta_runs: None,
mean_raw_violation_minus_ewma_delta_runs: None,
mean_persistent_violation_minus_cusum_delta_runs: None,
mean_persistent_violation_minus_run_energy_delta_runs: None,
mean_persistent_violation_minus_pca_fdc_delta_runs: None,
mean_persistent_violation_minus_threshold_delta_runs: None,
mean_persistent_violation_minus_ewma_delta_runs: None,
},
density_summary: DensitySummary {
density_window: 5,
mean_raw_boundary_density_failure: 0.0,
mean_raw_boundary_density_pass: 0.0,
mean_persistent_boundary_density_failure: 0.0,
mean_persistent_boundary_density_pass: 0.0,
mean_raw_violation_density_failure: 0.0,
mean_raw_violation_density_pass: 0.0,
mean_persistent_violation_density_failure: 0.0,
mean_persistent_violation_density_pass: 0.0,
mean_threshold_density_failure: 0.0,
mean_threshold_density_pass: 0.0,
mean_ewma_density_failure: 0.0,
mean_ewma_density_pass: 0.0,
mean_cusum_density_failure: 0.0,
mean_cusum_density_pass: 0.0,
},
boundary_episode_summary: BoundaryEpisodeSummary {
raw_episode_count: 28607,
persistent_episode_count: 0,
mean_raw_episode_length: None,
mean_persistent_episode_length: None,
max_raw_episode_length: 0,
max_persistent_episode_length: 0,
raw_non_escalating_episode_fraction: None,
persistent_non_escalating_episode_fraction: None,
},
dsa_summary: None,
motif_metrics: Vec::new(),
per_failure_run_signals: Vec::new(),
density_metrics: Vec::new(),
feature_metrics: Vec::new(),
top_feature_indices: Vec::new(),
}
}
#[test]
fn cohort_selection_is_deterministic() {
let first = build_feature_cohorts(&sample_ranking());
let second = build_feature_cohorts(&sample_ranking());
assert_eq!(
serde_json::to_value(&first).unwrap(),
serde_json::to_value(&second).unwrap()
);
assert_eq!(first.top_4.len(), 4);
assert!(first
.seed_feature_report
.iter()
.any(|seed| seed.feature_name == "S059"));
}
#[test]
fn seed_feature_check_artifact_is_emitted_deterministically() {
let cohorts = build_feature_cohorts(&sample_ranking());
let artifact = build_seed_feature_check(&cohorts);
assert_eq!(artifact.requested_seed_features.len(), 6);
assert_eq!(artifact.seed_feature_report[0].feature_name, "S059");
assert!(artifact.seed_feature_report[0].in_top_4);
}
#[test]
fn precursor_quality_csv_format_is_stable() {
let row = CohortGridResult {
ranking_strategy: "compression_biased".into(),
ranking_formula: RANKING_FORMULA.into(),
grid_row_id: 1,
feature_trace_config_id: 0,
cohort_name: "top_4".into(),
cohort_size: 4,
window: 5,
persistence_runs: 2,
alert_tau: 2.0,
corroborating_m: 2,
primary_run_signal: "signal".into(),
failure_recall: 10,
failure_runs: 12,
failure_recall_rate: 0.8333,
threshold_recall: 11,
ewma_recall: 11,
failure_recall_delta_vs_threshold: -1,
failure_recall_delta_vs_ewma: -1,
mean_lead_time_runs: Some(3.0),
median_lead_time_runs: Some(3.0),
threshold_mean_lead_time_runs: Some(2.0),
ewma_mean_lead_time_runs: Some(2.0),
mean_lead_delta_vs_threshold_runs: Some(1.0),
mean_lead_delta_vs_ewma_runs: Some(1.0),
pass_run_nuisance_proxy: 0.1,
numeric_pass_run_nuisance_proxy: 0.15,
ewma_nuisance: 0.2,
threshold_nuisance: 0.3,
pass_run_nuisance_delta_vs_ewma: -0.1,
pass_run_nuisance_delta_vs_threshold: -0.2,
pass_run_nuisance_delta_vs_numeric_dsa: -0.05,
raw_boundary_episode_count: 20,
dsa_episode_count: 4,
dsa_episodes_preceding_failure: 3,
mean_dsa_episode_length_runs: Some(2.0),
max_dsa_episode_length_runs: 5,
compression_ratio: Some(5.0),
precursor_quality: Some(0.75),
non_escalating_dsa_episode_fraction: Some(0.25),
feature_level_active_points: 8,
feature_level_alert_points: 4,
persistence_suppression_fraction: Some(0.5),
numeric_failure_recall: 11,
policy_vs_numeric_recall_delta: -1,
watch_point_count: 3,
review_point_count: 3,
escalate_point_count: 1,
investigation_point_count: 4,
numeric_investigation_point_count: 6,
silenced_point_count: 2,
rescued_point_count: 1,
rescued_watch_to_review_points: 1,
rescued_review_to_escalate_points: 0,
review_escalate_points_per_pass_run: 0.2,
numeric_alert_points_per_pass_run: 0.3,
review_escalate_episodes_per_pass_run: 0.1,
numeric_alert_episodes_per_pass_run: 0.15,
primary_success: true,
primary_success_reason: "ok".into(),
};
let temp = tempfile::tempdir().unwrap();
let path = temp.path().join("precursor_quality.csv");
write_precursor_quality_csv(&path, &[row]).unwrap();
let content = std::fs::read_to_string(path).unwrap();
assert!(content.contains("cohort_name,window,persistence_runs,alert_tau"));
assert!(content.contains("top_4,5,2,2.000000,2,20,4,3,0.750000,5.000000"));
}
#[test]
fn delta_target_assessment_reports_unreached_forty_percent_goal() {
let baseline_row = CohortGridResult {
ranking_strategy: "compression_biased".into(),
ranking_formula: RANKING_FORMULA.into(),
grid_row_id: 0,
feature_trace_config_id: 0,
cohort_name: "all_features".into(),
cohort_size: 100,
window: 10,
persistence_runs: 2,
alert_tau: 2.0,
corroborating_m: 1,
primary_run_signal: "signal".into(),
failure_recall: 100,
failure_runs: 104,
failure_recall_rate: 100.0 / 104.0,
threshold_recall: 104,
ewma_recall: 104,
failure_recall_delta_vs_threshold: -4,
failure_recall_delta_vs_ewma: -4,
mean_lead_time_runs: Some(18.7),
median_lead_time_runs: Some(20.0),
threshold_mean_lead_time_runs: Some(19.557692307692307),
ewma_mean_lead_time_runs: Some(19.576923076923077),
mean_lead_delta_vs_threshold_runs: Some(-0.8577),
mean_lead_delta_vs_ewma_runs: Some(-0.8769),
pass_run_nuisance_proxy: 0.8311688311688312,
numeric_pass_run_nuisance_proxy: 0.9330,
ewma_nuisance: 0.9863294600136705,
threshold_nuisance: 0.974025974025974,
pass_run_nuisance_delta_vs_ewma: -0.15516062884483928,
pass_run_nuisance_delta_vs_threshold: -0.1428571428571428,
pass_run_nuisance_delta_vs_numeric_dsa: -0.10183116883116884,
raw_boundary_episode_count: 28607,
dsa_episode_count: 65,
dsa_episodes_preceding_failure: 52,
mean_dsa_episode_length_runs: Some(17.0),
max_dsa_episode_length_runs: 110,
compression_ratio: Some(440.10769230769233),
precursor_quality: Some(0.8),
non_escalating_dsa_episode_fraction: Some(0.0),
feature_level_active_points: 0,
feature_level_alert_points: 0,
persistence_suppression_fraction: None,
numeric_failure_recall: 99,
policy_vs_numeric_recall_delta: 1,
watch_point_count: 0,
review_point_count: 0,
escalate_point_count: 0,
investigation_point_count: 3892,
numeric_investigation_point_count: 8014,
silenced_point_count: 0,
rescued_point_count: 0,
rescued_watch_to_review_points: 0,
rescued_review_to_escalate_points: 0,
review_escalate_points_per_pass_run: 2.515379357484621,
numeric_alert_points_per_pass_run: 5.187286397812714,
review_escalate_episodes_per_pass_run: 0.08133971291866028,
numeric_alert_episodes_per_pass_run: 0.05468215994531784,
primary_success: false,
primary_success_reason: "baseline".into(),
};
let optimized_row = CohortGridResult {
ranking_strategy: "compression_biased".into(),
ranking_formula: RANKING_FORMULA.into(),
grid_row_id: 1,
feature_trace_config_id: 0,
cohort_name: "all_features".into(),
cohort_size: 100,
window: 10,
persistence_runs: 4,
alert_tau: 2.0,
corroborating_m: 1,
primary_run_signal: "signal".into(),
failure_recall: 103,
failure_runs: 104,
failure_recall_rate: 103.0 / 104.0,
threshold_recall: 104,
ewma_recall: 104,
failure_recall_delta_vs_threshold: -1,
failure_recall_delta_vs_ewma: -1,
mean_lead_time_runs: Some(17.980582524271846),
median_lead_time_runs: Some(20.0),
threshold_mean_lead_time_runs: Some(19.557692307692307),
ewma_mean_lead_time_runs: Some(19.576923076923077),
mean_lead_delta_vs_threshold_runs: Some(-1.7475728155339805),
mean_lead_delta_vs_ewma_runs: Some(-1.766990291262136),
pass_run_nuisance_proxy: 0.7997265892002734,
numeric_pass_run_nuisance_proxy: 0.9180,
ewma_nuisance: 0.9863294600136705,
threshold_nuisance: 0.974025974025974,
pass_run_nuisance_delta_vs_ewma: -0.1866028708133971,
pass_run_nuisance_delta_vs_threshold: -0.17429938482570062,
pass_run_nuisance_delta_vs_numeric_dsa: -0.11827341079972659,
raw_boundary_episode_count: 28607,
dsa_episode_count: 73,
dsa_episodes_preceding_failure: 57,
mean_dsa_episode_length_runs: Some(17.041095890410958),
max_dsa_episode_length_runs: 110,
compression_ratio: Some(391.8767123287671),
precursor_quality: Some(0.7808219178082192),
non_escalating_dsa_episode_fraction: Some(0.0),
feature_level_active_points: 0,
feature_level_alert_points: 0,
persistence_suppression_fraction: None,
numeric_failure_recall: 99,
policy_vs_numeric_recall_delta: 4,
watch_point_count: 0,
review_point_count: 0,
escalate_point_count: 0,
investigation_point_count: 3892,
numeric_investigation_point_count: 8014,
silenced_point_count: 0,
rescued_point_count: 57,
rescued_watch_to_review_points: 57,
rescued_review_to_escalate_points: 0,
review_escalate_points_per_pass_run: 2.515379357484621,
numeric_alert_points_per_pass_run: 5.187286397812714,
review_escalate_episodes_per_pass_run: 0.08133971291866028,
numeric_alert_episodes_per_pass_run: 0.05468215994531784,
primary_success: true,
primary_success_reason: "selected".into(),
};
let metrics = sample_metrics_for_delta_target();
let assessment = compute_delta_target_assessment(
&optimized_row,
std::slice::from_ref(&optimized_row),
std::slice::from_ref(&optimized_row),
&baseline_row,
&metrics,
);
assert!(!assessment.primary_target_met);
assert!(!assessment.ideal_target_met);
assert!(!assessment.secondary_target_met);
assert!(
(assessment.selected_configuration.delta_nuisance_vs_ewma - 0.18918918918918917).abs()
< 1.0e-9
);
assert!(
(assessment
.selected_configuration
.delta_nuisance_vs_current_dsa
- 0.037828947368421136)
.abs()
< 1.0e-9
);
}
#[test]
fn delta_target_assessment_prefers_best_recall_preserving_delta_row() {
let template_row = CohortGridResult {
ranking_strategy: "compression_biased".into(),
ranking_formula: RANKING_FORMULA.into(),
grid_row_id: 1,
feature_trace_config_id: 0,
cohort_name: "all_features".into(),
cohort_size: 100,
window: 10,
persistence_runs: 4,
alert_tau: 2.0,
corroborating_m: 1,
primary_run_signal: "signal".into(),
failure_recall: 103,
failure_runs: 104,
failure_recall_rate: 103.0 / 104.0,
threshold_recall: 104,
ewma_recall: 104,
failure_recall_delta_vs_threshold: -1,
failure_recall_delta_vs_ewma: -1,
mean_lead_time_runs: Some(17.980582524271846),
median_lead_time_runs: Some(20.0),
threshold_mean_lead_time_runs: Some(19.557692307692307),
ewma_mean_lead_time_runs: Some(19.576923076923077),
mean_lead_delta_vs_threshold_runs: Some(-1.7475728155339805),
mean_lead_delta_vs_ewma_runs: Some(-1.766990291262136),
pass_run_nuisance_proxy: 0.7997265892002734,
numeric_pass_run_nuisance_proxy: 0.9180,
ewma_nuisance: 0.9863294600136705,
threshold_nuisance: 0.974025974025974,
pass_run_nuisance_delta_vs_ewma: -0.1866028708133971,
pass_run_nuisance_delta_vs_threshold: -0.17429938482570062,
pass_run_nuisance_delta_vs_numeric_dsa: -0.11827341079972659,
raw_boundary_episode_count: 28607,
dsa_episode_count: 73,
dsa_episodes_preceding_failure: 57,
mean_dsa_episode_length_runs: Some(17.041095890410958),
max_dsa_episode_length_runs: 110,
compression_ratio: Some(391.8767123287671),
precursor_quality: Some(0.7808219178082192),
non_escalating_dsa_episode_fraction: Some(0.0),
feature_level_active_points: 0,
feature_level_alert_points: 0,
persistence_suppression_fraction: None,
numeric_failure_recall: 99,
policy_vs_numeric_recall_delta: 4,
watch_point_count: 0,
review_point_count: 0,
escalate_point_count: 0,
investigation_point_count: 3892,
numeric_investigation_point_count: 8014,
silenced_point_count: 0,
rescued_point_count: 57,
rescued_watch_to_review_points: 57,
rescued_review_to_escalate_points: 0,
review_escalate_points_per_pass_run: 2.515379357484621,
numeric_alert_points_per_pass_run: 5.187286397812714,
review_escalate_episodes_per_pass_run: 0.08133971291866028,
numeric_alert_episodes_per_pass_run: 0.05468215994531784,
primary_success: true,
primary_success_reason: "selected".into(),
};
let baseline_row = CohortGridResult {
failure_recall: 100,
failure_recall_rate: 100.0 / 104.0,
failure_recall_delta_vs_threshold: -4,
failure_recall_delta_vs_ewma: -4,
mean_lead_time_runs: Some(18.7),
mean_lead_delta_vs_threshold_runs: Some(-0.8577),
mean_lead_delta_vs_ewma_runs: Some(-0.8769),
pass_run_nuisance_proxy: 0.8311688311688312,
numeric_pass_run_nuisance_proxy: 0.9330,
dsa_episode_count: 65,
compression_ratio: Some(440.10769230769233),
precursor_quality: Some(0.8),
numeric_failure_recall: 99,
policy_vs_numeric_recall_delta: 1,
rescued_point_count: 0,
rescued_watch_to_review_points: 0,
primary_success: false,
primary_success_reason: "baseline".into(),
..template_row.clone()
};
let selected_row = template_row.clone();
let weaker_recall_preserving_row = CohortGridResult {
ranking_strategy: "recall_aware".into(),
persistence_runs: 2,
pass_run_nuisance_proxy: 0.8386876281613124,
pass_run_nuisance_delta_vs_ewma: -0.14764183185235812,
pass_run_nuisance_delta_vs_threshold: -0.13533834586466164,
pass_run_nuisance_delta_vs_numeric_dsa: -0.09432679900680764,
dsa_episode_count: 67,
compression_ratio: Some(426.97014925373134),
precursor_quality: Some(0.8059701492537313),
..template_row.clone()
};
let metrics = sample_metrics_for_delta_target();
let assessment = compute_delta_target_assessment(
&selected_row,
std::slice::from_ref(&selected_row),
&[selected_row.clone(), weaker_recall_preserving_row],
&baseline_row,
&metrics,
);
let best = assessment
.best_recall_103_candidate
.expect("best recall row");
assert_eq!(best.configuration, row_label(&selected_row));
assert!((best.delta_nuisance_vs_ewma - 0.18918918918918917).abs() < 1.0e-9);
}
}