use crate::automation_v2::types::{
WorkflowLearningCandidate, WorkflowLearningCandidateKind, WorkflowLearningMetricsSnapshot,
};
pub const DEFAULT_MIN_AUTO_APPLY_CONFIDENCE: f64 = 0.8;
pub const DEFAULT_MIN_BASELINE_SAMPLE_SIZE: usize = 5;
pub const DEFAULT_MAX_HUMAN_INTERVENTION_RATE: f64 = 0.0;
pub const DEFAULT_POST_APPLY_MIN_SAMPLE_SIZE: usize = 3;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PromotionDecision {
AutoApply { reason_code: &'static str },
RequireHumanReview {
reason_code: &'static str,
reason: String,
},
Block {
reason_code: &'static str,
reason: String,
},
}
impl PromotionDecision {
pub fn is_auto_apply(&self) -> bool {
matches!(self, Self::AutoApply { .. })
}
pub fn reason_code(&self) -> &'static str {
match self {
Self::AutoApply { reason_code }
| Self::RequireHumanReview { reason_code, .. }
| Self::Block { reason_code, .. } => reason_code,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum RegressionVerdict {
Insufficient,
Healthy,
Regressed {
reason_code: &'static str,
reason: String,
},
}
impl RegressionVerdict {
pub fn is_regressed(&self) -> bool {
matches!(self, Self::Regressed { .. })
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct WorkflowLearningPromotionPolicy {
pub auto_apply_enabled: bool,
pub min_confidence: f64,
pub min_baseline_sample_size: usize,
pub max_human_intervention_rate: f64,
pub post_apply_min_sample_size: usize,
pub regression_margin: f64,
}
impl Default for WorkflowLearningPromotionPolicy {
fn default() -> Self {
Self {
auto_apply_enabled: false,
min_confidence: DEFAULT_MIN_AUTO_APPLY_CONFIDENCE,
min_baseline_sample_size: DEFAULT_MIN_BASELINE_SAMPLE_SIZE,
max_human_intervention_rate: DEFAULT_MAX_HUMAN_INTERVENTION_RATE,
post_apply_min_sample_size: DEFAULT_POST_APPLY_MIN_SAMPLE_SIZE,
regression_margin: f64::EPSILON,
}
}
}
fn env_bool(key: &str) -> Option<bool> {
std::env::var(key)
.ok()
.and_then(|v| match v.trim().to_ascii_lowercase().as_str() {
"1" | "true" | "yes" | "on" => Some(true),
"0" | "false" | "no" | "off" => Some(false),
_ => None,
})
}
fn env_f64(key: &str) -> Option<f64> {
std::env::var(key).ok().and_then(|v| v.trim().parse().ok())
}
fn env_usize(key: &str) -> Option<usize> {
std::env::var(key).ok().and_then(|v| v.trim().parse().ok())
}
impl WorkflowLearningPromotionPolicy {
pub fn from_env() -> Self {
let defaults = Self::default();
Self {
auto_apply_enabled: env_bool("TANDEM_WORKFLOW_LEARNING_AUTO_APPLY")
.unwrap_or(defaults.auto_apply_enabled),
min_confidence: env_f64("TANDEM_WORKFLOW_LEARNING_MIN_CONFIDENCE")
.unwrap_or(defaults.min_confidence),
min_baseline_sample_size: env_usize("TANDEM_WORKFLOW_LEARNING_MIN_BASELINE_SAMPLE")
.unwrap_or(defaults.min_baseline_sample_size),
max_human_intervention_rate: env_f64(
"TANDEM_WORKFLOW_LEARNING_MAX_HUMAN_INTERVENTION_RATE",
)
.unwrap_or(defaults.max_human_intervention_rate),
post_apply_min_sample_size: env_usize("TANDEM_WORKFLOW_LEARNING_POST_APPLY_MIN_SAMPLE")
.unwrap_or(defaults.post_apply_min_sample_size),
regression_margin: env_f64("TANDEM_WORKFLOW_LEARNING_REGRESSION_MARGIN")
.unwrap_or(defaults.regression_margin),
}
}
fn kind_is_structural(kind: WorkflowLearningCandidateKind) -> bool {
matches!(kind, WorkflowLearningCandidateKind::GraphPatch)
}
fn human_intervention_rate(metrics: &WorkflowLearningMetricsSnapshot) -> f64 {
if metrics.sample_size == 0 {
return 1.0;
}
metrics.human_intervention_count as f64 / metrics.sample_size as f64
}
pub fn evaluate_promotion(
&self,
candidate: &WorkflowLearningCandidate,
metrics: &WorkflowLearningMetricsSnapshot,
) -> PromotionDecision {
if Self::kind_is_structural(candidate.kind) || candidate.needs_plan_bundle {
return PromotionDecision::Block {
reason_code: "structural_change_requires_human",
reason: "graph/structural patches and plan-bundle changes are never auto-applied"
.to_string(),
};
}
if !self.auto_apply_enabled {
return PromotionDecision::RequireHumanReview {
reason_code: "auto_apply_disabled",
reason: "auto-apply is disabled; candidate awaits human review".to_string(),
};
}
if candidate.confidence + f64::EPSILON < self.min_confidence {
return PromotionDecision::RequireHumanReview {
reason_code: "insufficient_confidence",
reason: format!(
"confidence {:.3} is below the auto-apply threshold {:.3}",
candidate.confidence, self.min_confidence
),
};
}
if metrics.sample_size < self.min_baseline_sample_size {
return PromotionDecision::RequireHumanReview {
reason_code: "insufficient_evidence",
reason: format!(
"sample size {} is below the minimum {} required for auto-apply",
metrics.sample_size, self.min_baseline_sample_size
),
};
}
let intervention_rate = Self::human_intervention_rate(metrics);
if intervention_rate > self.max_human_intervention_rate + f64::EPSILON {
return PromotionDecision::RequireHumanReview {
reason_code: "active_human_steering",
reason: format!(
"human-intervention rate {:.3} exceeds the auto-apply ceiling {:.3}",
intervention_rate, self.max_human_intervention_rate
),
};
}
PromotionDecision::AutoApply {
reason_code: "thresholds_met",
}
}
pub fn evaluate_regression(
&self,
baseline: &WorkflowLearningMetricsSnapshot,
latest: &WorkflowLearningMetricsSnapshot,
post_apply_sample_size: usize,
) -> RegressionVerdict {
if post_apply_sample_size < self.post_apply_min_sample_size {
return RegressionVerdict::Insufficient;
}
if latest.completion_rate + self.regression_margin < baseline.completion_rate {
return RegressionVerdict::Regressed {
reason_code: "completion_rate_regressed",
reason: format!(
"completion rate fell from {:.3} to {:.3} after apply",
baseline.completion_rate, latest.completion_rate
),
};
}
if latest.validation_pass_rate + self.regression_margin < baseline.validation_pass_rate {
return RegressionVerdict::Regressed {
reason_code: "validation_pass_rate_regressed",
reason: format!(
"validation pass rate fell from {:.3} to {:.3} after apply",
baseline.validation_pass_rate, latest.validation_pass_rate
),
};
}
RegressionVerdict::Healthy
}
}
#[cfg(test)]
mod tests;