use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use terraphim_spawner::health::HealthStatus;
use terraphim_spawner::output::OutputEvent;
use tokio::sync::mpsc;
use crate::config::NightwatchConfig;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Claim {
pub claim: String,
pub evidence: String,
pub dimension: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct ReasoningCertificate {
pub premises: Vec<String>,
pub claims: Vec<Claim>,
pub edge_cases: Vec<String>,
pub formal_conclusion: String,
pub confidence: f64,
}
pub fn validate_certificate(cert: &ReasoningCertificate) -> bool {
cert.premises.len() >= 2
&& !cert.claims.is_empty()
&& !cert.formal_conclusion.is_empty()
&& cert.confidence > 0.0
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DualPanelResult {
pub panel_a_score: f64,
pub panel_b_score: f64,
pub agreement: f64,
pub drift_detected: bool,
pub details: String,
}
pub fn dual_panel_evaluate(
output: &str,
certificate: Option<&ReasoningCertificate>,
) -> DualPanelResult {
let panel_a_score = if let Some(cert) = certificate {
calculate_certificate_score(cert)
} else {
0.0
};
let panel_b_score = calculate_structure_score(output);
let agreement = 1.0 - (panel_a_score - panel_b_score).abs();
let drift_detected = agreement < 0.5;
let details = format!(
"Panel A (certificate): {:.2}, Panel B (structure): {:.2}, Agreement: {:.2} - {}",
panel_a_score,
panel_b_score,
agreement,
if drift_detected {
"DRIFT DETECTED: panels disagree significantly"
} else {
"No drift: panels agree"
}
);
DualPanelResult {
panel_a_score,
panel_b_score,
agreement,
drift_detected,
details,
}
}
fn calculate_certificate_score(cert: &ReasoningCertificate) -> f64 {
if !validate_certificate(cert) {
return 0.0;
}
let mut score: f64 = 0.5;
if cert.premises.len() > 2 {
score += 0.1;
}
if cert.claims.len() > 1 {
score += 0.1;
}
if !cert.edge_cases.is_empty() {
score += 0.1;
}
if cert.confidence > 0.8 {
score += 0.2;
}
score.min(1.0)
}
fn calculate_structure_score(output: &str) -> f64 {
let lower = output.to_lowercase();
let mut score: f64 = 0.0;
if lower.contains("##") || lower.contains("###") {
score += 0.3;
}
if lower.contains("evidence:")
|| lower.contains("because")
|| lower.contains("since")
|| lower.contains("given that")
{
score += 0.3;
}
if lower.contains("conclusion:")
|| lower.contains("therefore")
|| lower.contains("thus")
|| lower.contains("in conclusion")
{
score += 0.3;
}
if output.len() >= 100 {
score += 0.1;
}
score.min(1.0)
}
#[derive(Debug, Clone, Default)]
pub struct DriftMetrics {
pub error_rate: f64,
pub command_success_rate: f64,
pub health_score: f64,
pub cost_efficiency: f64,
pub budget_exhaustion_rate: f64,
pub sample_count: u64,
}
#[derive(Debug, Clone)]
pub struct DriftScore {
pub agent_name: String,
pub score: f64,
pub metrics: DriftMetrics,
pub level: CorrectionLevel,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum CorrectionLevel {
Normal,
Minor,
Moderate,
Severe,
Critical,
}
#[derive(Debug, Clone)]
pub struct DriftAlert {
pub agent_name: String,
pub drift_score: DriftScore,
pub recommended_action: CorrectionAction,
}
#[derive(Debug, Clone)]
pub enum CorrectionAction {
LogWarning(String),
RestartAgent,
PauseAndEscalate(String),
}
#[derive(Debug, Default)]
struct AgentMetricAccumulator {
total_lines: u64,
error_lines: u64,
health_checks: u64,
healthy_checks: u64,
total_cost_usd: f64,
total_tokens: u64,
budget_cents: Option<u64>,
}
impl AgentMetricAccumulator {
fn drift_metrics(&self) -> DriftMetrics {
if self.total_lines == 0 && self.health_checks == 0 {
return DriftMetrics {
error_rate: 0.0,
command_success_rate: 1.0,
health_score: 1.0,
cost_efficiency: 0.0,
budget_exhaustion_rate: 0.0, sample_count: 0,
};
}
let error_rate = if self.total_lines > 0 {
self.error_lines as f64 / self.total_lines as f64
} else {
0.0
};
let command_success_rate = if self.total_lines > 0 {
1.0 - error_rate
} else {
1.0
};
let health_score = if self.health_checks > 0 {
self.healthy_checks as f64 / self.health_checks as f64
} else {
1.0
};
let cost_efficiency = if self.total_cost_usd > 0.0 {
self.total_tokens as f64 / self.total_cost_usd
} else {
0.0
};
let budget_exhaustion_rate = match self.budget_cents {
Some(cents) if cents > 0 => {
let spent_cents = (self.total_cost_usd * 100.0) as u64;
spent_cents as f64 / cents as f64
}
_ => 0.0, };
DriftMetrics {
error_rate,
command_success_rate,
health_score,
cost_efficiency,
budget_exhaustion_rate,
sample_count: self.total_lines + self.health_checks,
}
}
fn reset(&mut self) {
self.total_lines = 0;
self.error_lines = 0;
self.health_checks = 0;
self.healthy_checks = 0;
self.total_cost_usd = 0.0;
self.total_tokens = 0;
}
}
pub struct NightwatchMonitor {
config: NightwatchConfig,
agent_metrics: HashMap<String, AgentMetricAccumulator>,
alert_tx: mpsc::Sender<DriftAlert>,
alert_rx: mpsc::Receiver<DriftAlert>,
}
impl NightwatchMonitor {
pub fn new(config: NightwatchConfig) -> Self {
let (alert_tx, alert_rx) = mpsc::channel(64);
Self {
config,
agent_metrics: HashMap::new(),
alert_tx,
alert_rx,
}
}
pub fn observe(&mut self, agent_name: &str, event: &OutputEvent) {
let acc = self
.agent_metrics
.entry(agent_name.to_string())
.or_default();
match event {
OutputEvent::Stdout { .. } => {
acc.total_lines += 1;
}
OutputEvent::Stderr { line, .. } => {
acc.total_lines += 1;
let lower = line.to_lowercase();
if lower.contains("error")
|| lower.contains("panic")
|| lower.contains("fatal")
|| lower.contains("failed")
{
acc.error_lines += 1;
}
}
OutputEvent::Mention { .. } => {
acc.total_lines += 1;
}
OutputEvent::Completed { .. } => {}
}
}
pub fn observe_health(&mut self, agent_name: &str, status: HealthStatus) {
let acc = self
.agent_metrics
.entry(agent_name.to_string())
.or_default();
acc.health_checks += 1;
if status == HealthStatus::Healthy {
acc.healthy_checks += 1;
}
}
pub fn observe_cost(
&mut self,
agent_name: &str,
cost_usd: f64,
input_tokens: u64,
output_tokens: u64,
budget_cents: Option<u64>,
) {
let acc = self
.agent_metrics
.entry(agent_name.to_string())
.or_default();
acc.total_cost_usd += cost_usd;
acc.total_tokens += input_tokens + output_tokens;
if acc.budget_cents.is_none() {
acc.budget_cents = budget_cents;
}
}
pub async fn next_alert(&mut self) -> DriftAlert {
self.alert_rx
.recv()
.await
.expect("alert channel should never close while monitor exists")
}
pub fn evaluate(&mut self) {
let mut alerts = Vec::new();
for (name, acc) in &self.agent_metrics {
let metrics = acc.drift_metrics();
let score = self.calculate_drift(&metrics);
let level = self.classify_drift(score);
if level > CorrectionLevel::Normal {
let action = Self::recommended_action(level, name);
alerts.push(DriftAlert {
agent_name: name.clone(),
drift_score: DriftScore {
agent_name: name.clone(),
score,
metrics,
level,
},
recommended_action: action,
});
}
}
for alert in alerts {
let _ = self.alert_tx.try_send(alert);
}
}
pub fn drift_score(&self, agent_name: &str) -> Option<DriftScore> {
self.agent_metrics.get(agent_name).map(|acc| {
let metrics = acc.drift_metrics();
let score = self.calculate_drift(&metrics);
let level = self.classify_drift(score);
DriftScore {
agent_name: agent_name.to_string(),
score,
metrics,
level,
}
})
}
pub fn all_drift_scores(&self) -> Vec<DriftScore> {
self.agent_metrics
.iter()
.map(|(name, acc)| {
let metrics = acc.drift_metrics();
let score = self.calculate_drift(&metrics);
let level = self.classify_drift(score);
DriftScore {
agent_name: name.clone(),
score,
metrics,
level,
}
})
.collect()
}
pub fn reset(&mut self, agent_name: &str) {
if let Some(acc) = self.agent_metrics.get_mut(agent_name) {
acc.reset();
}
}
fn calculate_drift(&self, metrics: &DriftMetrics) -> f64 {
if metrics.sample_count == 0 {
return 0.0;
}
let error_weight = self.config.error_weight;
let success_weight = self.config.success_weight;
let health_weight = self.config.health_weight;
let budget_weight = self.config.budget_weight;
let error_drift = metrics.error_rate;
let success_drift = 1.0 - metrics.command_success_rate;
let health_drift = 1.0 - metrics.health_score;
let budget_drift = if metrics.budget_exhaustion_rate > 0.8 {
(metrics.budget_exhaustion_rate - 0.8) * 5.0 } else {
0.0
};
error_weight * error_drift
+ success_weight * success_drift
+ health_weight * health_drift
+ budget_weight * budget_drift
}
fn classify_drift(&self, score: f64) -> CorrectionLevel {
if score >= self.config.critical_threshold {
CorrectionLevel::Critical
} else if score >= self.config.severe_threshold {
CorrectionLevel::Severe
} else if score >= self.config.moderate_threshold {
CorrectionLevel::Moderate
} else if score >= self.config.minor_threshold {
CorrectionLevel::Minor
} else {
CorrectionLevel::Normal
}
}
fn recommended_action(level: CorrectionLevel, agent_name: &str) -> CorrectionAction {
match level {
CorrectionLevel::Normal => CorrectionAction::LogWarning("no action needed".to_string()),
CorrectionLevel::Minor => {
CorrectionAction::LogWarning(format!("minor drift detected for {}", agent_name))
}
CorrectionLevel::Moderate => CorrectionAction::RestartAgent,
CorrectionLevel::Severe => CorrectionAction::RestartAgent,
CorrectionLevel::Critical => CorrectionAction::PauseAndEscalate(format!(
"critical drift for {}, human intervention required",
agent_name
)),
}
}
}
#[derive(Debug, Clone, Default)]
pub struct RateLimitTracker {
pub calls: HashMap<(String, String), RateLimitWindow>,
}
#[derive(Debug, Clone)]
pub struct RateLimitWindow {
pub calls_this_hour: u32,
pub hourly_limit: Option<u32>,
pub window_start: chrono::DateTime<chrono::Utc>,
}
impl RateLimitTracker {
pub fn record_call(&mut self, agent_name: &str, provider_id: &str) {
let key = (agent_name.to_string(), provider_id.to_string());
let window = self.calls.entry(key).or_insert_with(|| RateLimitWindow {
calls_this_hour: 0,
hourly_limit: None,
window_start: chrono::Utc::now(),
});
let elapsed = chrono::Utc::now() - window.window_start;
if elapsed.num_seconds() >= 3600 {
window.calls_this_hour = 0;
window.window_start = chrono::Utc::now();
}
window.calls_this_hour += 1;
}
pub fn can_call(&self, agent_name: &str, provider_id: &str) -> bool {
let key = (agent_name.to_string(), provider_id.to_string());
match self.calls.get(&key) {
Some(window) => match window.hourly_limit {
Some(limit) => window.calls_this_hour < limit,
None => true,
},
None => true,
}
}
pub fn update_limit(&mut self, agent_name: &str, provider_id: &str, limit: u32) {
let key = (agent_name.to_string(), provider_id.to_string());
if let Some(window) = self.calls.get_mut(&key) {
window.hourly_limit = Some(limit);
}
}
pub fn remaining(&self, agent_name: &str, provider_id: &str) -> Option<u32> {
let key = (agent_name.to_string(), provider_id.to_string());
self.calls.get(&key).and_then(|window| {
window
.hourly_limit
.map(|limit| limit.saturating_sub(window.calls_this_hour))
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use terraphim_types::capability::ProcessId;
fn make_stdout(line: &str) -> OutputEvent {
OutputEvent::Stdout {
process_id: ProcessId::new(),
line: line.to_string(),
}
}
fn make_stderr(line: &str) -> OutputEvent {
OutputEvent::Stderr {
process_id: ProcessId::new(),
line: line.to_string(),
}
}
#[test]
fn test_drift_metrics_zero() {
let monitor = NightwatchMonitor::new(NightwatchConfig::default());
assert!(monitor.drift_score("nonexistent").is_none());
}
#[test]
fn test_drift_metrics_normal() {
let mut monitor = NightwatchMonitor::new(NightwatchConfig::default());
for _ in 0..100 {
monitor.observe("agent-a", &make_stdout("ok"));
}
monitor.observe_health("agent-a", HealthStatus::Healthy);
let ds = monitor.drift_score("agent-a").unwrap();
assert_eq!(ds.level, CorrectionLevel::Normal);
assert!(ds.score < 0.10);
}
#[test]
fn test_drift_metrics_minor() {
let mut monitor = NightwatchMonitor::new(NightwatchConfig::default());
for _ in 0..85 {
monitor.observe("agent-b", &make_stdout("ok"));
}
for _ in 0..15 {
monitor.observe("agent-b", &make_stderr("error"));
}
monitor.observe_health("agent-b", HealthStatus::Healthy);
let ds = monitor.drift_score("agent-b").unwrap();
assert_eq!(ds.level, CorrectionLevel::Normal);
}
#[test]
fn test_drift_metrics_moderate() {
let mut monitor = NightwatchMonitor::new(NightwatchConfig::default());
for _ in 0..70 {
monitor.observe("agent-c", &make_stdout("ok"));
}
for _ in 0..30 {
monitor.observe("agent-c", &make_stderr("error"));
}
monitor.observe_health("agent-c", HealthStatus::Healthy);
let ds = monitor.drift_score("agent-c").unwrap();
assert_eq!(ds.level, CorrectionLevel::Minor);
}
#[test]
fn test_drift_metrics_severe() {
let mut monitor = NightwatchMonitor::new(NightwatchConfig::default());
for _ in 0..40 {
monitor.observe("agent-d", &make_stdout("ok"));
}
for _ in 0..60 {
monitor.observe("agent-d", &make_stderr("error"));
}
for _ in 0..5 {
monitor.observe_health("agent-d", HealthStatus::Healthy);
}
for _ in 0..5 {
monitor.observe_health("agent-d", HealthStatus::Degraded);
}
let ds = monitor.drift_score("agent-d").unwrap();
assert_eq!(ds.level, CorrectionLevel::Severe);
}
#[test]
fn test_drift_metrics_critical() {
let mut monitor = NightwatchMonitor::new(NightwatchConfig::default());
for _ in 0..10 {
monitor.observe("agent-e", &make_stdout("ok"));
}
for _ in 0..90 {
monitor.observe("agent-e", &make_stderr("error"));
}
for _ in 0..8 {
monitor.observe_health("agent-e", HealthStatus::Unhealthy);
}
for _ in 0..2 {
monitor.observe_health("agent-e", HealthStatus::Healthy);
}
let ds = monitor.drift_score("agent-e").unwrap();
assert_eq!(ds.level, CorrectionLevel::Critical);
}
#[test]
fn test_drift_reset() {
let mut monitor = NightwatchMonitor::new(NightwatchConfig::default());
for _ in 0..50 {
monitor.observe("agent-f", &make_stderr("error"));
}
let ds = monitor.drift_score("agent-f").unwrap();
assert!(ds.score > 0.5);
monitor.reset("agent-f");
let ds = monitor.drift_score("agent-f").unwrap();
assert!(ds.score < f64::EPSILON);
assert_eq!(ds.metrics.sample_count, 0);
}
#[test]
fn test_stderr_without_error_keywords_not_counted() {
let mut monitor = NightwatchMonitor::new(NightwatchConfig::default());
for _ in 0..100 {
monitor.observe("agent-bun", &make_stderr("bun install v1.2.3"));
}
monitor.observe_health("agent-bun", HealthStatus::Healthy);
let ds = monitor.drift_score("agent-bun").unwrap();
assert_eq!(ds.metrics.error_rate, 0.0);
assert_eq!(ds.level, CorrectionLevel::Normal);
}
#[test]
fn test_stderr_with_error_keywords_counted() {
let mut monitor = NightwatchMonitor::new(NightwatchConfig::default());
for _ in 0..50 {
monitor.observe("agent-err", &make_stdout("ok"));
}
for _ in 0..50 {
monitor.observe("agent-err", &make_stderr("fatal: connection refused"));
}
monitor.observe_health("agent-err", HealthStatus::Healthy);
let ds = monitor.drift_score("agent-err").unwrap();
assert_eq!(ds.metrics.error_rate, 0.5);
assert!(ds.level >= CorrectionLevel::Moderate);
}
#[test]
fn test_correction_level_ordering() {
assert!(CorrectionLevel::Normal < CorrectionLevel::Minor);
assert!(CorrectionLevel::Minor < CorrectionLevel::Moderate);
assert!(CorrectionLevel::Moderate < CorrectionLevel::Severe);
assert!(CorrectionLevel::Severe < CorrectionLevel::Critical);
}
#[test]
fn test_rate_limit_tracker_basic() {
let mut tracker = RateLimitTracker::default();
assert!(tracker.can_call("agent-a", "openai"));
assert!(tracker.remaining("agent-a", "openai").is_none());
tracker.record_call("agent-a", "openai");
tracker.update_limit("agent-a", "openai", 100);
assert!(tracker.can_call("agent-a", "openai"));
assert_eq!(tracker.remaining("agent-a", "openai"), Some(99));
}
#[test]
fn test_rate_limit_tracker_exhausted() {
let mut tracker = RateLimitTracker::default();
tracker.record_call("agent-b", "anthropic");
tracker.update_limit("agent-b", "anthropic", 2);
tracker.record_call("agent-b", "anthropic");
assert!(!tracker.can_call("agent-b", "anthropic"));
assert_eq!(tracker.remaining("agent-b", "anthropic"), Some(0));
}
#[test]
fn test_evaluate_emits_alerts() {
let mut monitor = NightwatchMonitor::new(NightwatchConfig::default());
for _ in 0..90 {
monitor.observe("bad-agent", &make_stderr("error"));
}
for _ in 0..10 {
monitor.observe("bad-agent", &make_stdout("ok"));
}
monitor.evaluate();
match monitor.alert_rx.try_recv() {
Ok(alert) => {
assert_eq!(alert.agent_name, "bad-agent");
assert!(alert.drift_score.level >= CorrectionLevel::Moderate);
}
Err(_) => panic!("expected alert from evaluate"),
}
}
#[test]
fn test_reasoning_certificate_valid() {
let cert = ReasoningCertificate {
premises: vec!["premise1".to_string(), "premise2".to_string()],
claims: vec![Claim {
claim: "claim1".to_string(),
evidence: "evidence1".to_string(),
dimension: Some("test".to_string()),
}],
edge_cases: vec![],
formal_conclusion: "conclusion".to_string(),
confidence: 0.95,
};
assert!(validate_certificate(&cert));
}
#[test]
fn test_reasoning_certificate_insufficient_premises() {
let cert = ReasoningCertificate {
premises: vec!["premise1".to_string()],
claims: vec![Claim {
claim: "claim1".to_string(),
evidence: "evidence1".to_string(),
dimension: None,
}],
edge_cases: vec![],
formal_conclusion: "conclusion".to_string(),
confidence: 0.95,
};
assert!(!validate_certificate(&cert));
}
#[test]
fn test_reasoning_certificate_no_claims() {
let cert = ReasoningCertificate {
premises: vec!["premise1".to_string(), "premise2".to_string()],
claims: vec![],
edge_cases: vec![],
formal_conclusion: "conclusion".to_string(),
confidence: 0.95,
};
assert!(!validate_certificate(&cert));
}
#[test]
fn test_reasoning_certificate_empty_conclusion() {
let cert = ReasoningCertificate {
premises: vec!["premise1".to_string(), "premise2".to_string()],
claims: vec![Claim {
claim: "claim1".to_string(),
evidence: "evidence1".to_string(),
dimension: None,
}],
edge_cases: vec![],
formal_conclusion: "".to_string(),
confidence: 0.95,
};
assert!(!validate_certificate(&cert));
}
#[test]
fn test_reasoning_certificate_zero_confidence() {
let cert = ReasoningCertificate {
premises: vec!["premise1".to_string(), "premise2".to_string()],
claims: vec![Claim {
claim: "claim1".to_string(),
evidence: "evidence1".to_string(),
dimension: None,
}],
edge_cases: vec![],
formal_conclusion: "conclusion".to_string(),
confidence: 0.0,
};
assert!(!validate_certificate(&cert));
}
#[test]
fn test_reasoning_certificate_default_invalid() {
let cert = ReasoningCertificate::default();
assert!(!validate_certificate(&cert));
}
#[test]
fn test_reasoning_certificate_with_edge_cases() {
let cert = ReasoningCertificate {
premises: vec!["premise1".to_string(), "premise2".to_string()],
claims: vec![
Claim {
claim: "claim1".to_string(),
evidence: "evidence1".to_string(),
dimension: Some("dimension1".to_string()),
},
Claim {
claim: "claim2".to_string(),
evidence: "evidence2".to_string(),
dimension: Some("dimension2".to_string()),
},
],
edge_cases: vec!["edge1".to_string(), "edge2".to_string()],
formal_conclusion: "formal conclusion".to_string(),
confidence: 0.85,
};
assert!(validate_certificate(&cert));
assert_eq!(cert.premises.len(), 2);
assert_eq!(cert.claims.len(), 2);
assert_eq!(cert.edge_cases.len(), 2);
}
#[test]
fn test_claim_without_dimension() {
let claim = Claim {
claim: "test claim".to_string(),
evidence: "test evidence".to_string(),
dimension: None,
};
assert_eq!(claim.claim, "test claim");
assert_eq!(claim.evidence, "test evidence");
assert!(claim.dimension.is_none());
}
#[test]
fn test_dual_panel_both_agree_no_drift() {
let output = r#"## Analysis
This is a well-structured output with evidence.
## Evidence
The data shows X because of Y.
## Conclusion
Therefore, we should proceed with Z."#;
let cert = ReasoningCertificate {
premises: vec!["premise1".to_string(), "premise2".to_string()],
claims: vec![
Claim {
claim: "claim1".to_string(),
evidence: "evidence1".to_string(),
dimension: Some("test".to_string()),
},
Claim {
claim: "claim2".to_string(),
evidence: "evidence2".to_string(),
dimension: Some("test2".to_string()),
},
],
edge_cases: vec!["edge1".to_string()],
formal_conclusion: "conclusion".to_string(),
confidence: 0.95,
};
let result = dual_panel_evaluate(output, Some(&cert));
assert!(
result.panel_a_score > 0.5,
"Panel A should score high with valid cert"
);
assert!(
result.panel_b_score > 0.5,
"Panel B should score high with structured output"
);
assert!(result.agreement >= 0.5, "Panels should agree");
assert!(
!result.drift_detected,
"No drift should be detected when panels agree"
);
}
#[test]
fn test_dual_panel_disagree_drift_detected() {
let output = "short";
let cert = ReasoningCertificate {
premises: vec!["premise1".to_string(), "premise2".to_string()],
claims: vec![Claim {
claim: "claim1".to_string(),
evidence: "evidence1".to_string(),
dimension: None,
}],
edge_cases: vec![],
formal_conclusion: "conclusion".to_string(),
confidence: 0.95,
};
let result = dual_panel_evaluate(output, Some(&cert));
assert!(result.panel_a_score > 0.0, "Panel A should have some score");
assert!(
result.panel_b_score < 0.5,
"Panel B should score low with unstructured output"
);
assert!(result.agreement < 0.5, "Panels should disagree");
assert!(
result.drift_detected,
"Drift should be detected when panels disagree"
);
}
#[test]
fn test_dual_panel_missing_certificate() {
let output = r#"## Analysis
This output has structure but no certificate.
## Evidence
Because of reasons.
## Conclusion
Therefore, success."#;
let result = dual_panel_evaluate(output, None);
assert_eq!(
result.panel_a_score, 0.0,
"Panel A should be 0 when no certificate"
);
assert!(
result.panel_b_score > 0.5,
"Panel B should score high with structured output"
);
assert!(
result.drift_detected,
"Drift should be detected when certificate is missing"
);
}
#[test]
fn test_dual_panel_both_poor_no_drift() {
let output = "x";
let cert = ReasoningCertificate {
premises: vec!["only_one".to_string()], claims: vec![],
edge_cases: vec![],
formal_conclusion: "".to_string(),
confidence: 0.0,
};
let result = dual_panel_evaluate(output, Some(&cert));
assert_eq!(
result.panel_a_score, 0.0,
"Panel A should be 0 with invalid cert"
);
assert_eq!(
result.panel_b_score, 0.0,
"Panel B should be 0 with no structure"
);
assert_eq!(
result.agreement, 1.0,
"Agreement should be perfect when both score 0"
);
assert!(
!result.drift_detected,
"No drift when both panels agree (even if low)"
);
}
#[test]
fn test_dual_panel_result_serialization() {
let result = DualPanelResult {
panel_a_score: 0.9,
panel_b_score: 0.8,
agreement: 0.9,
drift_detected: false,
details: "Test details".to_string(),
};
let json = serde_json::to_string(&result).unwrap();
assert!(json.contains("panel_a_score"));
assert!(json.contains("0.9"));
assert!(json.contains("drift_detected"));
let deserialized: DualPanelResult = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized.panel_a_score, 0.9);
assert_eq!(deserialized.panel_b_score, 0.8);
assert_eq!(deserialized.agreement, 0.9);
assert!(!deserialized.drift_detected);
}
#[test]
fn test_calculate_certificate_score_components() {
let minimal = ReasoningCertificate {
premises: vec!["p1".to_string(), "p2".to_string()],
claims: vec![Claim {
claim: "c1".to_string(),
evidence: "e1".to_string(),
dimension: None,
}],
edge_cases: vec![],
formal_conclusion: "conclusion".to_string(),
confidence: 0.5,
};
assert_eq!(calculate_certificate_score(&minimal), 0.5);
let full = ReasoningCertificate {
premises: vec!["p1".to_string(), "p2".to_string(), "p3".to_string()],
claims: vec![
Claim {
claim: "c1".to_string(),
evidence: "e1".to_string(),
dimension: None,
},
Claim {
claim: "c2".to_string(),
evidence: "e2".to_string(),
dimension: None,
},
],
edge_cases: vec!["edge".to_string()],
formal_conclusion: "conclusion".to_string(),
confidence: 0.95,
};
assert_eq!(calculate_certificate_score(&full), 1.0);
}
#[test]
fn test_calculate_structure_score_components() {
assert_eq!(calculate_structure_score(""), 0.0);
assert_eq!(calculate_structure_score("x".repeat(100).as_str()), 0.1);
assert!(calculate_structure_score("## Section") >= 0.3);
assert!(calculate_structure_score("evidence: because") >= 0.3);
assert!(calculate_structure_score("conclusion: therefore") >= 0.3);
let full = "## Analysis\n\nevidence: X is supported by the data\n\nconclusion: Therefore we should proceed with Y. This is the final conclusion of this analysis.";
assert!((calculate_structure_score(full) - 1.0).abs() < f64::EPSILON);
}
}