use crate::error::Result;
use crate::verification::types::*;
pub struct ConfidenceScorer {
config: VerificationConfig,
}
impl ConfidenceScorer {
pub fn new() -> Self {
Self {
config: VerificationConfig::default(),
}
}
pub fn with_config(config: VerificationConfig) -> Self {
Self { config }
}
pub fn score(&self, verification: &VerificationResult) -> f64 {
let aime_weight = 0.40;
let step_consistency = self.calculate_step_consistency(verification);
let theorem_coverage = verification.details.theorem_coverage.coverage_score;
let cross_model_agreement =
self.extract_cross_model_agreement(&verification.details.cross_model_validation);
let confidence = aime_weight * verification.confidence
+ 0.30 * step_consistency
+ 0.20 * theorem_coverage
+ 0.10 * cross_model_agreement;
confidence.min(1.0).max(0.0)
}
fn calculate_step_consistency(&self, verification: &VerificationResult) -> f64 {
let total_steps = verification.details.valid_steps + verification.details.steps_with_issues;
if total_steps == 0 {
return 0.0;
}
verification.details.valid_steps as f64 / total_steps as f64
}
fn extract_cross_model_agreement(&self, validation: &Option<CrossModelValidation>) -> f64 {
validation.as_ref().map(|v| v.agreement).unwrap_or(0.5) }
pub fn recommend(&self, confidence: f64) -> ValidationRecommendation {
if confidence >= self.config.auto_accept_threshold {
ValidationRecommendation::Accept
} else if confidence >= self.config.review_threshold {
ValidationRecommendation::AcceptWithAnnotation {
reason: format!(
"Confidence {:?}% below auto-accept threshold of {:?}%",
confidence * 100.0,
self.config.auto_accept_threshold * 100.0
),
}
} else {
ValidationRecommendation::ReviewRequired {
reasons: vec![format!(
"Confidence {:?}% below review threshold of {:?}%",
confidence * 100.0,
self.config.review_threshold * 100.0
)],
}
}
}
pub fn score_breakdown(&self, verification: &VerificationResult) -> ConfidenceBreakdown {
let aime_accuracy = verification.confidence;
let step_consistency = self.calculate_step_consistency(verification);
let theorem_coverage = verification.details.theorem_coverage.coverage_score;
let cross_model_agreement =
self.extract_cross_model_agreement(&verification.details.cross_model_validation);
ConfidenceBreakdown {
aime_accuracy,
step_consistency,
theorem_coverage,
cross_model_agreement,
overall_confidence: self.score(verification),
recommendation: self.recommend(self.score(verification)),
meets_auto_accept: self.score(verification) >= self.config.auto_accept_threshold,
meets_annotation_threshold: self.score(verification) >= self.config.review_threshold,
}
}
}
#[derive(Debug, Clone)]
pub struct ConfidenceBreakdown {
pub aime_accuracy: f64,
pub step_consistency: f64,
pub theorem_coverage: f64,
pub cross_model_agreement: f64,
pub overall_confidence: f64,
pub recommendation: ValidationRecommendation,
pub meets_auto_accept: bool,
pub meets_annotation_threshold: bool,
}
impl ConfidenceBreakdown {
pub fn to_string(&self) -> String {
format!(
"\
Confidence Breakdown:
===================
AIME Accuracy (40%): {:.2}%
Step Consistency (30%): {:.2}%
Theorem Coverage (20%): {:.2}%
Cross-Model Agreement (10%): {:.2}%
Overall Confidence: {:.2}%
Recommendation: {}
Thresholds:
- Auto-accept: {:.2}% (met: {})
- Annotation: {:.2}% (met: {})
- Review: {:.2}% (met: {})\n",
self.aime_accuracy * 100.0,
self.step_consistency * 100.0,
self.theorem_coverage * 100.0,
self.cross_model_agreement * 100.0,
self.overall_confidence * 100.0,
self.recommendation,
90.0,
self.meets_auto_accept,
75.0,
self.meets_annotation_threshold,
75.0,
!self.meets_annotation_threshold
)
}
}
impl Default for ConfidenceScorer {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_confidence_scorer_creation() {
let scorer = ConfidenceScorer::new();
assert_eq!(scorer.config.auto_accept_threshold, 0.90);
}
#[test]
fn test_confidence_breakdown_formatting() {
let breakdown = ConfidenceBreakdown {
aime_accuracy: 0.91,
step_consistency: 0.95,
theorem_coverage: 0.88,
cross_model_agreement: 0.80,
overall_confidence: 0.90,
recommendation: ValidationRecommendation::Accept,
meets_auto_accept: true,
meets_annotation_threshold: true,
};
let formatted = breakdown.to_string();
assert!(formatted.contains("91.00%"));
assert!(formatted.contains("90.00%"));
assert!(formatted.contains("ACCEPT"));
}
#[test]
fn test_recommend_accept() {
let scorer = ConfidenceScorer::new();
let rec = scorer.recommend(0.95);
assert!(matches!(rec, ValidationRecommendation::Accept));
}
#[test]
fn test_recommend_annotation() {
let scorer = ConfidenceScorer::new();
let rec = scorer.recommend(0.82);
assert!(matches!(
rec,
ValidationRecommendation::AcceptWithAnnotation { .. }
));
}
#[test]
fn test_recommend_review() {
let scorer = ConfidenceScorer::new();
let rec = scorer.recommend(0.65);
assert!(matches!(
rec,
ValidationRecommendation::ReviewRequired { .. }
));
}
}