use brainwires_core::ChatResponse;
#[derive(Debug, Clone, Default)]
pub struct ResponseConfidence {
pub score: f64,
pub factors: ConfidenceFactors,
}
impl ResponseConfidence {
pub fn is_high_confidence(&self) -> bool {
self.score >= 0.8
}
pub fn is_low_confidence(&self) -> bool {
self.score < 0.6
}
pub fn level(&self) -> &'static str {
if self.score >= 0.9 {
"very_high"
} else if self.score >= 0.8 {
"high"
} else if self.score >= 0.6 {
"medium"
} else if self.score >= 0.4 {
"low"
} else {
"very_low"
}
}
}
#[derive(Debug, Clone, Default)]
pub struct ConfidenceFactors {
pub completion_confidence: f64,
pub pattern_confidence: f64,
pub length_confidence: f64,
pub structure_confidence: f64,
}
impl ConfidenceFactors {
pub fn weakest_factor(&self) -> (&'static str, f64) {
let factors = [
("completion", self.completion_confidence),
("pattern", self.pattern_confidence),
("length", self.length_confidence),
("structure", self.structure_confidence),
];
factors
.into_iter()
.min_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
.unwrap_or(("unknown", 0.5))
}
}
const LOW_CONFIDENCE_PATTERNS: &[&str] = &[
"i'm not sure",
"i think",
"possibly",
"might be",
"could be",
"i believe",
"probably",
"perhaps",
"maybe",
"not certain",
"unclear",
"i guess",
"it seems",
"apparently",
];
const SELF_CORRECTION_PATTERNS: &[&str] = &[
"wait,",
"actually,",
"let me reconsider",
"i made a mistake",
"correction:",
"i was wrong",
"on second thought",
"i need to revise",
"let me correct",
"that's not right",
];
const HIGH_CONFIDENCE_PATTERNS: &[&str] = &[
"the answer is",
"definitely",
"certainly",
"clearly",
"without doubt",
"the solution is",
"this will work",
"i can confirm",
];
pub fn extract_confidence(response: &ChatResponse) -> ResponseConfidence {
let text = get_response_text(response);
let completion_confidence = calculate_completion_confidence(&response.finish_reason);
let pattern_confidence = calculate_pattern_confidence(&text);
let length_confidence = calculate_length_confidence(&text);
let structure_confidence = calculate_structure_confidence(response);
let score = completion_confidence * 0.30
+ pattern_confidence * 0.35
+ length_confidence * 0.15
+ structure_confidence * 0.20;
ResponseConfidence {
score: score.clamp(0.0, 1.0),
factors: ConfidenceFactors {
completion_confidence,
pattern_confidence,
length_confidence,
structure_confidence,
},
}
}
fn get_response_text(response: &ChatResponse) -> String {
use brainwires_core::MessageContent;
match &response.message.content {
MessageContent::Text(text) => text.clone(),
MessageContent::Blocks(blocks) => {
use brainwires_core::ContentBlock;
blocks
.iter()
.filter_map(|block| {
if let ContentBlock::Text { text } = block {
Some(text.as_str())
} else {
None
}
})
.collect::<Vec<_>>()
.join(" ")
}
}
}
fn calculate_completion_confidence(finish_reason: &Option<String>) -> f64 {
match finish_reason.as_deref() {
Some("stop") | Some("end_turn") => 0.95,
Some("tool_use") => 0.90, Some("length") | Some("max_tokens") => 0.50, Some("content_filter") => 0.30, None => 0.70, _ => 0.60, }
}
fn calculate_pattern_confidence(text: &str) -> f64 {
let text_lower = text.to_lowercase();
let low_confidence_count = LOW_CONFIDENCE_PATTERNS
.iter()
.filter(|p| text_lower.contains(*p))
.count();
let self_correction_count = SELF_CORRECTION_PATTERNS
.iter()
.filter(|p| text_lower.contains(*p))
.count();
let high_confidence_count = HIGH_CONFIDENCE_PATTERNS
.iter()
.filter(|p| text_lower.contains(*p))
.count();
let mut confidence = 0.75;
confidence -= (low_confidence_count as f64 * 0.08).min(0.35);
confidence -= (self_correction_count as f64 * 0.15).min(0.30);
confidence += (high_confidence_count as f64 * 0.05).min(0.15);
confidence.clamp(0.25, 0.98)
}
fn calculate_length_confidence(text: &str) -> f64 {
let token_estimate = text.len() / 4;
if token_estimate < 10 {
0.40 } else if token_estimate < 30 {
0.60 } else if token_estimate < 50 {
0.75 } else if token_estimate <= 500 {
0.90 } else if token_estimate <= 1000 {
0.75 } else if token_estimate <= 2000 {
0.60 } else {
0.50 }
}
fn calculate_structure_confidence(response: &ChatResponse) -> f64 {
use brainwires_core::MessageContent;
match &response.message.content {
MessageContent::Text(_) => 0.70, MessageContent::Blocks(blocks) => {
use brainwires_core::ContentBlock;
let has_tool_use = blocks
.iter()
.any(|b| matches!(b, ContentBlock::ToolUse { .. }));
if has_tool_use {
0.90 } else {
0.75 }
}
}
}
pub fn quick_confidence_check(response: &ChatResponse) -> bool {
if response.finish_reason.as_deref() == Some("length") {
return false;
}
let text = get_response_text(response);
let text_lower = text.to_lowercase();
let obvious_low_confidence = [
"i'm not sure",
"i don't know",
"i cannot",
"i made a mistake",
"that's not right",
];
!obvious_low_confidence
.iter()
.any(|p| text_lower.contains(*p))
}
#[cfg(test)]
mod tests {
use super::*;
use brainwires_core::{Message, MessageContent, Usage};
fn make_response(text: &str, finish_reason: Option<&str>) -> ChatResponse {
ChatResponse {
message: Message {
role: brainwires_core::Role::Assistant,
content: MessageContent::Text(text.to_string()),
name: None,
metadata: None,
},
usage: Usage::default(),
finish_reason: finish_reason.map(String::from),
}
}
#[test]
fn test_high_confidence_response() {
let response = make_response(
"The solution is to use a hashmap for O(1) lookup. This will definitely work.",
Some("stop"),
);
let confidence = extract_confidence(&response);
assert!(confidence.score > 0.75);
assert!(confidence.is_high_confidence() || confidence.score >= 0.7);
}
#[test]
fn test_low_confidence_response() {
let response = make_response(
"I'm not sure, but I think maybe this could possibly work. Let me reconsider...",
Some("stop"),
);
let confidence = extract_confidence(&response);
assert!(
confidence.score < 0.75,
"Expected low confidence score, got {}",
confidence.score
);
assert!(confidence.factors.pattern_confidence < 0.7);
}
#[test]
fn test_truncated_response() {
let response = make_response(
"The answer involves several steps. First, we need to",
Some("length"),
);
let confidence = extract_confidence(&response);
assert!(confidence.factors.completion_confidence < 0.6);
}
#[test]
fn test_very_short_response() {
let response = make_response("Yes", Some("stop"));
let confidence = extract_confidence(&response);
assert!(confidence.factors.length_confidence < 0.7);
}
#[test]
fn test_pattern_confidence_calculation() {
let high = calculate_pattern_confidence(
"The solution is definitely correct and will certainly work.",
);
assert!(high > 0.7);
let low =
calculate_pattern_confidence("I'm not sure, but maybe it could possibly work perhaps.");
assert!(low < 0.6);
}
#[test]
fn test_quick_confidence_check() {
let good = make_response("Here is the implementation you need.", Some("stop"));
assert!(quick_confidence_check(&good));
let bad = make_response("I don't know how to do this.", Some("stop"));
assert!(!quick_confidence_check(&bad));
}
#[test]
fn test_confidence_level() {
let high = ResponseConfidence {
score: 0.9,
..Default::default()
};
assert_eq!(high.level(), "very_high");
let low = ResponseConfidence {
score: 0.3,
..Default::default()
};
assert_eq!(low.level(), "very_low");
}
}