use std::sync::atomic::{AtomicU64, Ordering};
use crate::config::PilotMetricsConfig;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum InterventionPoint {
Start,
Fork,
Backtrack,
Evaluate,
}
fn f64_to_u64_bits(v: f64) -> u64 {
v.to_bits()
}
fn u64_bits_to_f64(v: u64) -> f64 {
f64::from_bits(v)
}
#[derive(Debug, Default)]
pub struct PilotMetrics {
pub total_decisions: AtomicU64,
pub start_guidance_calls: AtomicU64,
pub fork_decisions: AtomicU64,
pub backtrack_calls: AtomicU64,
pub evaluate_calls: AtomicU64,
pub correct_decisions: AtomicU64,
pub incorrect_decisions: AtomicU64,
pub confidence_sum_scaled: AtomicU64,
pub confidence_count: AtomicU64,
pub llm_calls: AtomicU64,
pub interventions: AtomicU64,
pub skipped_interventions: AtomicU64,
pub budget_exhausted: AtomicU64,
pub algorithm_fallbacks: AtomicU64,
}
impl PilotMetrics {
pub fn new() -> Self {
Self::default()
}
pub fn record_decision(
&self,
confidence: f64,
point: InterventionPoint,
config: &PilotMetricsConfig,
) {
if !config.track_decisions {
return;
}
self.total_decisions.fetch_add(1, Ordering::Relaxed);
match point {
InterventionPoint::Start => {
self.start_guidance_calls.fetch_add(1, Ordering::Relaxed);
}
InterventionPoint::Fork => {
self.fork_decisions.fetch_add(1, Ordering::Relaxed);
}
InterventionPoint::Backtrack => {
self.backtrack_calls.fetch_add(1, Ordering::Relaxed);
}
InterventionPoint::Evaluate => {
self.evaluate_calls.fetch_add(1, Ordering::Relaxed);
}
}
let scaled_confidence = (confidence * 1_000_000.0) as u64;
self.confidence_sum_scaled
.fetch_add(scaled_confidence, Ordering::Relaxed);
self.confidence_count.fetch_add(1, Ordering::Relaxed);
}
pub fn record_feedback(&self, was_correct: bool, config: &PilotMetricsConfig) {
if !config.track_feedback {
return;
}
if was_correct {
self.correct_decisions.fetch_add(1, Ordering::Relaxed);
} else {
self.incorrect_decisions.fetch_add(1, Ordering::Relaxed);
}
}
pub fn record_llm_call(&self) {
self.llm_calls.fetch_add(1, Ordering::Relaxed);
}
pub fn record_intervention(&self) {
self.interventions.fetch_add(1, Ordering::Relaxed);
}
pub fn record_skipped_intervention(&self) {
self.skipped_interventions.fetch_add(1, Ordering::Relaxed);
}
pub fn record_budget_exhausted(&self) {
self.budget_exhausted.fetch_add(1, Ordering::Relaxed);
}
pub fn record_algorithm_fallback(&self) {
self.algorithm_fallbacks.fetch_add(1, Ordering::Relaxed);
}
pub fn reset(&self) {
self.total_decisions.store(0, Ordering::Relaxed);
self.start_guidance_calls.store(0, Ordering::Relaxed);
self.fork_decisions.store(0, Ordering::Relaxed);
self.backtrack_calls.store(0, Ordering::Relaxed);
self.evaluate_calls.store(0, Ordering::Relaxed);
self.correct_decisions.store(0, Ordering::Relaxed);
self.incorrect_decisions.store(0, Ordering::Relaxed);
self.confidence_sum_scaled.store(0, Ordering::Relaxed);
self.confidence_count.store(0, Ordering::Relaxed);
self.llm_calls.store(0, Ordering::Relaxed);
self.interventions.store(0, Ordering::Relaxed);
self.skipped_interventions.store(0, Ordering::Relaxed);
self.budget_exhausted.store(0, Ordering::Relaxed);
self.algorithm_fallbacks.store(0, Ordering::Relaxed);
}
pub fn generate_report(&self) -> PilotMetricsReport {
let total_decisions = self.total_decisions.load(Ordering::Relaxed);
let correct = self.correct_decisions.load(Ordering::Relaxed);
let total_feedback = correct + self.incorrect_decisions.load(Ordering::Relaxed);
let confidence_count = self.confidence_count.load(Ordering::Relaxed);
let confidence_sum_scaled = self.confidence_sum_scaled.load(Ordering::Relaxed);
PilotMetricsReport {
total_decisions,
start_guidance_calls: self.start_guidance_calls.load(Ordering::Relaxed),
fork_decisions: self.fork_decisions.load(Ordering::Relaxed),
backtrack_calls: self.backtrack_calls.load(Ordering::Relaxed),
evaluate_calls: self.evaluate_calls.load(Ordering::Relaxed),
accuracy: if total_feedback > 0 {
correct as f64 / total_feedback as f64
} else {
0.0
},
correct_decisions: correct,
incorrect_decisions: self.incorrect_decisions.load(Ordering::Relaxed),
avg_confidence: if confidence_count > 0 {
(confidence_sum_scaled as f64 / 1_000_000.0) / confidence_count as f64
} else {
0.0
},
llm_calls: self.llm_calls.load(Ordering::Relaxed),
interventions: self.interventions.load(Ordering::Relaxed),
skipped_interventions: self.skipped_interventions.load(Ordering::Relaxed),
budget_exhausted: self.budget_exhausted.load(Ordering::Relaxed),
algorithm_fallbacks: self.algorithm_fallbacks.load(Ordering::Relaxed),
}
}
}
#[derive(Debug, Clone)]
pub struct PilotMetricsReport {
pub total_decisions: u64,
pub start_guidance_calls: u64,
pub fork_decisions: u64,
pub backtrack_calls: u64,
pub evaluate_calls: u64,
pub accuracy: f64,
pub correct_decisions: u64,
pub incorrect_decisions: u64,
pub avg_confidence: f64,
pub llm_calls: u64,
pub interventions: u64,
pub skipped_interventions: u64,
pub budget_exhausted: u64,
pub algorithm_fallbacks: u64,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_pilot_metrics_recording() {
let config = PilotMetricsConfig::default();
let metrics = PilotMetrics::new();
metrics.record_decision(0.9, InterventionPoint::Start, &config);
metrics.record_decision(0.8, InterventionPoint::Fork, &config);
metrics.record_decision(0.7, InterventionPoint::Fork, &config);
metrics.record_feedback(true, &config);
metrics.record_feedback(false, &config);
let report = metrics.generate_report();
assert_eq!(report.total_decisions, 3);
assert_eq!(report.fork_decisions, 2);
assert!((report.accuracy - 0.5).abs() < 0.01);
assert!((report.avg_confidence - 0.8).abs() < 0.01);
}
}