1use super::reflective_agent::ExecutionContext;
32use serde::{Deserialize, Serialize};
33use std::collections::HashMap;
34
35#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct ConfidenceConfig {
38 pub threshold: f32,
40 pub revision_budget: u32,
42 pub min_improvement: f32,
44 pub factor_weights: ConfidenceFactorWeights,
46 pub use_structural_analysis: bool,
48 pub low_confidence_patterns: Vec<String>,
50}
51
52impl Default for ConfidenceConfig {
53 fn default() -> Self {
54 Self {
55 threshold: 0.7,
56 revision_budget: 3,
57 min_improvement: 0.05,
58 factor_weights: ConfidenceFactorWeights::default(),
59 use_structural_analysis: true,
60 low_confidence_patterns: vec![
61 "I'm not sure".to_string(),
62 "might be".to_string(),
63 "possibly".to_string(),
64 "could be wrong".to_string(),
65 "uncertain".to_string(),
66 "TODO".to_string(),
67 "FIXME".to_string(),
68 "not implemented".to_string(),
69 ],
70 }
71 }
72}
73
74#[derive(Debug, Clone, Serialize, Deserialize)]
76pub struct ConfidenceFactorWeights {
77 pub completeness: f32,
79 pub structure: f32,
81 pub certainty: f32,
83 pub relevance: f32,
85 pub code_validity: f32,
87}
88
89impl Default for ConfidenceFactorWeights {
90 fn default() -> Self {
91 Self {
92 completeness: 0.25,
93 structure: 0.20,
94 certainty: 0.20,
95 relevance: 0.20,
96 code_validity: 0.15,
97 }
98 }
99}
100
101#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
103pub enum ConfidenceLevel {
104 VeryHigh,
106 High,
108 Medium,
110 Low,
112 VeryLow,
114}
115
116impl ConfidenceLevel {
117 pub fn from_score(score: f32) -> Self {
119 match score {
120 s if s > 0.9 => Self::VeryHigh,
121 s if s > 0.7 => Self::High,
122 s if s > 0.5 => Self::Medium,
123 s if s > 0.3 => Self::Low,
124 _ => Self::VeryLow,
125 }
126 }
127
128 pub fn as_str(&self) -> &'static str {
130 match self {
131 Self::VeryHigh => "very_high",
132 Self::High => "high",
133 Self::Medium => "medium",
134 Self::Low => "low",
135 Self::VeryLow => "very_low",
136 }
137 }
138
139 pub fn should_revise(&self) -> bool {
141 matches!(self, Self::Low | Self::VeryLow)
142 }
143}
144
145#[derive(Debug, Clone, Serialize, Deserialize)]
147pub struct WeakPoint {
148 pub location: String,
150 pub description: String,
152 pub severity: f32,
154 pub weakness_type: WeaknessType,
156 pub suggestion: String,
158 pub confidence: f32,
160}
161
162impl WeakPoint {
163 pub fn new(
165 location: impl Into<String>,
166 description: impl Into<String>,
167 severity: f32,
168 weakness_type: WeaknessType,
169 ) -> Self {
170 Self {
171 location: location.into(),
172 description: description.into(),
173 severity: severity.clamp(0.0, 1.0),
174 weakness_type,
175 suggestion: String::new(),
176 confidence: 0.8,
177 }
178 }
179
180 pub fn with_suggestion(mut self, suggestion: impl Into<String>) -> Self {
182 self.suggestion = suggestion.into();
183 self
184 }
185}
186
187#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
189pub enum WeaknessType {
190 Incomplete,
192 Uncertainty,
194 MissingErrorHandling,
196 MissingValidation,
198 CodeSmell,
200 MissingTests,
202 DocumentationGap,
204 SecurityConcern,
206 PerformanceIssue,
208 LogicError,
210 Other,
212}
213
214#[derive(Debug, Clone, Serialize, Deserialize)]
216pub struct RevisionResult {
217 pub original_confidence: f32,
219 pub new_confidence: f32,
221 pub improvement: f32,
223 pub addressed_weak_points: Vec<WeakPoint>,
225 pub remaining_weak_points: Vec<WeakPoint>,
227 pub revision_count: u32,
229 pub successful: bool,
231}
232
233#[derive(Debug)]
235pub struct ConfidenceChecker {
236 config: ConfidenceConfig,
238 check_history: Vec<ConfidenceCheckRecord>,
240 learned_patterns: HashMap<String, f32>,
242}
243
244#[derive(Debug, Clone, Serialize, Deserialize)]
246pub struct ConfidenceCheckRecord {
247 pub score: f32,
249 pub level: ConfidenceLevel,
251 pub weak_points: Vec<WeakPoint>,
253 pub factors: HashMap<String, f32>,
255 pub task_summary: String,
257 pub timestamp: u64,
259}
260
261impl ConfidenceChecker {
262 pub fn new(config: ConfidenceConfig) -> Self {
264 Self {
265 config,
266 check_history: Vec::new(),
267 learned_patterns: HashMap::new(),
268 }
269 }
270
271 pub fn should_revise(&self, output: &str, context: &ExecutionContext) -> bool {
273 let confidence = self.compute_confidence(output, context);
274 let attempts = context.previous_attempts.len() as u32;
275
276 confidence < self.config.threshold && attempts < self.config.revision_budget
280 }
281
282 pub fn compute_confidence(&self, output: &str, context: &ExecutionContext) -> f32 {
284 let weights = &self.config.factor_weights;
285 let mut score = 0.0f32;
286
287 let completeness = self.assess_completeness(output, context);
289 score += completeness * weights.completeness;
290
291 let structure = self.assess_structure(output);
293 score += structure * weights.structure;
294
295 let certainty = self.assess_certainty(output);
297 score += certainty * weights.certainty;
298
299 let relevance = self.assess_relevance(output, context);
301 score += relevance * weights.relevance;
302
303 let code_validity = self.assess_code_validity(output);
305 score += code_validity * weights.code_validity;
306
307 for (pattern, weight) in &self.learned_patterns {
309 if output.to_lowercase().contains(&pattern.to_lowercase()) {
310 score *= 1.0 - weight; }
312 }
313
314 score.clamp(0.0, 1.0)
315 }
316
317 fn assess_completeness(&self, output: &str, context: &ExecutionContext) -> f32 {
319 if output.is_empty() {
320 return 0.0;
321 }
322
323 let mut score = 0.5f32; let task_words: Vec<&str> = context.task.split_whitespace().collect();
327 let output_lower = output.to_lowercase();
328 let addressed_count = task_words
329 .iter()
330 .filter(|w| output_lower.contains(&w.to_lowercase()))
331 .count();
332 let addressed_ratio = addressed_count as f32 / task_words.len().max(1) as f32;
333 score += addressed_ratio * 0.3;
334
335 let incomplete_markers = ["TODO", "FIXME", "...", "to be continued", "incomplete"];
337 let has_incomplete = incomplete_markers
338 .iter()
339 .any(|m| output.contains(m));
340 if has_incomplete {
341 score -= 0.2;
342 }
343
344 if output.len() > 500 {
346 score += 0.1;
347 }
348 if output.len() > 1000 {
349 score += 0.1;
350 }
351
352 score.clamp(0.0, 1.0)
353 }
354
355 fn assess_structure(&self, output: &str) -> f32 {
357 if !self.config.use_structural_analysis {
358 return 0.8; }
360
361 let mut score = 0.5f32;
362
363 let has_code_blocks = output.contains("```");
365 if has_code_blocks {
366 score += 0.2;
367 }
368
369 let has_headers = output.contains("##") || output.contains("**");
371 if has_headers {
372 score += 0.1;
373 }
374
375 let has_lists = output.contains("\n- ") || output.contains("\n* ") || output.contains("\n1.");
377 if has_lists {
378 score += 0.1;
379 }
380
381 if output.len() < 50 {
383 score -= 0.2;
384 }
385
386 let line_count = output.lines().count();
388 if line_count > 5 {
389 score += 0.1;
390 }
391
392 score.clamp(0.0, 1.0)
393 }
394
395 fn assess_certainty(&self, output: &str) -> f32 {
397 let output_lower = output.to_lowercase();
398 let mut uncertainty_count = 0;
399
400 for pattern in &self.config.low_confidence_patterns {
401 if output_lower.contains(&pattern.to_lowercase()) {
402 uncertainty_count += 1;
403 }
404 }
405
406 match uncertainty_count {
408 0 => 1.0,
409 1 => 0.8,
410 2 => 0.6,
411 3 => 0.4,
412 _ => 0.2,
413 }
414 }
415
416 fn assess_relevance(&self, output: &str, context: &ExecutionContext) -> f32 {
418 let task_lower = context.task.to_lowercase();
419 let output_lower = output.to_lowercase();
420
421 let key_terms: Vec<&str> = task_lower
423 .split_whitespace()
424 .filter(|w| w.len() > 3) .collect();
426
427 if key_terms.is_empty() {
428 return 0.5;
429 }
430
431 let matched = key_terms
432 .iter()
433 .filter(|term| output_lower.contains(*term))
434 .count();
435
436 let ratio = matched as f32 / key_terms.len() as f32;
437 (ratio * 0.5 + 0.5).clamp(0.0, 1.0) }
439
440 fn assess_code_validity(&self, output: &str) -> f32 {
442 let has_code = output.contains("```") || output.contains("fn ") || output.contains("def ")
444 || output.contains("function ") || output.contains("class ");
445
446 if !has_code {
447 return 0.8; }
449
450 let mut score = 0.7f32;
451
452 let open_parens = output.matches('(').count();
454 let close_parens = output.matches(')').count();
455 let open_braces = output.matches('{').count();
456 let close_braces = output.matches('}').count();
457 let open_brackets = output.matches('[').count();
458 let close_brackets = output.matches(']').count();
459
460 if open_parens == close_parens {
461 score += 0.1;
462 } else {
463 score -= 0.2;
464 }
465
466 if open_braces == close_braces {
467 score += 0.1;
468 } else {
469 score -= 0.2;
470 }
471
472 if open_brackets == close_brackets {
473 score += 0.1;
474 } else {
475 score -= 0.1;
476 }
477
478 if output.contains("error[") || output.contains("Error:") {
480 score -= 0.3;
481 }
482
483 score.clamp(0.0, 1.0)
484 }
485
486 pub fn identify_weak_points(&self, output: &str, context: &ExecutionContext) -> Vec<WeakPoint> {
488 let mut weak_points = Vec::new();
489
490 for pattern in &self.config.low_confidence_patterns {
492 if let Some(pos) = output.to_lowercase().find(&pattern.to_lowercase()) {
493 let line_num = output[..pos].matches('\n').count() + 1;
494 weak_points.push(
495 WeakPoint::new(
496 format!("line {}", line_num),
497 format!("Uncertainty marker: '{}'", pattern),
498 0.6,
499 WeaknessType::Uncertainty,
500 )
501 .with_suggestion(format!("Remove or clarify the uncertain statement at '{}'", pattern)),
502 );
503 }
504 }
505
506 for marker in ["TODO", "FIXME", "XXX", "HACK"] {
508 if output.contains(marker) {
509 let count = output.matches(marker).count();
510 weak_points.push(
511 WeakPoint::new(
512 "multiple locations",
513 format!("Found {} {} markers", count, marker),
514 0.7,
515 WeaknessType::Incomplete,
516 )
517 .with_suggestion(format!("Address all {} items", marker)),
518 );
519 }
520 }
521
522 if output.contains("fn ") || output.contains("async fn ") {
524 if !output.contains("Result<") && !output.contains("Option<") && !output.contains("?") {
525 weak_points.push(
526 WeakPoint::new(
527 "function definitions",
528 "Functions may lack proper error handling",
529 0.5,
530 WeaknessType::MissingErrorHandling,
531 )
532 .with_suggestion("Add Result/Option return types and error propagation"),
533 );
534 }
535 }
536
537 if context.task.to_lowercase().contains("input")
539 || context.task.to_lowercase().contains("parameter")
540 {
541 if !output.to_lowercase().contains("valid")
542 && !output.to_lowercase().contains("check")
543 && !output.to_lowercase().contains("assert")
544 {
545 weak_points.push(
546 WeakPoint::new(
547 "input handling",
548 "May be missing input validation",
549 0.4,
550 WeaknessType::MissingValidation,
551 )
552 .with_suggestion("Add input validation and bounds checking"),
553 );
554 }
555 }
556
557 if context.task.to_lowercase().contains("test") {
559 if !output.contains("#[test]") && !output.contains("fn test_") {
560 weak_points.push(
561 WeakPoint::new(
562 "test coverage",
563 "No test functions found",
564 0.6,
565 WeaknessType::MissingTests,
566 )
567 .with_suggestion("Add unit tests with #[test] attribute"),
568 );
569 }
570 }
571
572 weak_points
573 }
574
575 pub fn generate_targeted_revision(
577 &self,
578 output: &str,
579 weak_points: &[WeakPoint],
580 ) -> String {
581 if weak_points.is_empty() {
582 return output.to_string();
583 }
584
585 let mut revision_prompt = String::from("Please revise the following output to address these specific issues:\n\n");
586
587 for (i, wp) in weak_points.iter().enumerate() {
588 revision_prompt.push_str(&format!(
589 "{}. [{:?}] At {}: {}\n Suggestion: {}\n\n",
590 i + 1,
591 wp.weakness_type,
592 wp.location,
593 wp.description,
594 wp.suggestion
595 ));
596 }
597
598 revision_prompt.push_str("\nOriginal output:\n");
599 revision_prompt.push_str(output);
600
601 revision_prompt
602 }
603
604 pub fn record_check(&mut self, output: &str, context: &ExecutionContext) -> ConfidenceCheckRecord {
606 let score = self.compute_confidence(output, context);
607 let level = ConfidenceLevel::from_score(score);
608 let weak_points = self.identify_weak_points(output, context);
609
610 let mut factors = HashMap::new();
611 factors.insert("completeness".to_string(), self.assess_completeness(output, context));
612 factors.insert("structure".to_string(), self.assess_structure(output));
613 factors.insert("certainty".to_string(), self.assess_certainty(output));
614 factors.insert("relevance".to_string(), self.assess_relevance(output, context));
615 factors.insert("code_validity".to_string(), self.assess_code_validity(output));
616
617 let record = ConfidenceCheckRecord {
618 score,
619 level,
620 weak_points,
621 factors,
622 task_summary: context.task.chars().take(100).collect(),
623 timestamp: std::time::SystemTime::now()
624 .duration_since(std::time::UNIX_EPOCH)
625 .map(|d| d.as_secs())
626 .unwrap_or(0),
627 };
628
629 self.check_history.push(record.clone());
630 record
631 }
632
633 pub fn learn_pattern(&mut self, pattern: String, weight: f32) {
635 self.learned_patterns.insert(pattern, weight.clamp(0.0, 1.0));
636 }
637
638 pub fn history(&self) -> &[ConfidenceCheckRecord] {
640 &self.check_history
641 }
642
643 pub fn clear_history(&mut self) {
645 self.check_history.clear();
646 }
647
648 pub fn config(&self) -> &ConfidenceConfig {
650 &self.config
651 }
652}
653
654#[cfg(test)]
655mod tests {
656 use super::*;
657 use crate::claude_flow::AgentType;
658
659 #[test]
660 fn test_confidence_level_from_score() {
661 assert_eq!(ConfidenceLevel::from_score(0.95), ConfidenceLevel::VeryHigh);
662 assert_eq!(ConfidenceLevel::from_score(0.8), ConfidenceLevel::High);
663 assert_eq!(ConfidenceLevel::from_score(0.6), ConfidenceLevel::Medium);
664 assert_eq!(ConfidenceLevel::from_score(0.4), ConfidenceLevel::Low);
665 assert_eq!(ConfidenceLevel::from_score(0.2), ConfidenceLevel::VeryLow);
666 }
667
668 #[test]
669 fn test_should_revise_low_levels() {
670 assert!(ConfidenceLevel::Low.should_revise());
671 assert!(ConfidenceLevel::VeryLow.should_revise());
672 assert!(!ConfidenceLevel::Medium.should_revise());
673 assert!(!ConfidenceLevel::High.should_revise());
674 }
675
676 #[test]
677 fn test_confidence_checker_creation() {
678 let config = ConfidenceConfig::default();
679 let checker = ConfidenceChecker::new(config);
680 assert_eq!(checker.config().threshold, 0.7);
681 }
682
683 #[test]
684 fn test_compute_confidence_empty() {
685 let checker = ConfidenceChecker::new(ConfidenceConfig::default());
686 let context = ExecutionContext::new("test task", AgentType::Coder, "input");
687 let confidence = checker.compute_confidence("", &context);
688 assert!(confidence < 0.5);
689 }
690
691 #[test]
692 fn test_compute_confidence_with_uncertainty() {
693 let checker = ConfidenceChecker::new(ConfidenceConfig::default());
694 let context = ExecutionContext::new("implement function", AgentType::Coder, "input");
695
696 let confident_output = "Here is the implementation:\n```rust\nfn example() { }\n```";
697 let uncertain_output = "I'm not sure but possibly this might work...";
698
699 let conf1 = checker.compute_confidence(confident_output, &context);
700 let conf2 = checker.compute_confidence(uncertain_output, &context);
701
702 assert!(conf1 > conf2);
703 }
704
705 #[test]
706 fn test_identify_weak_points_todo() {
707 let checker = ConfidenceChecker::new(ConfidenceConfig::default());
708 let context = ExecutionContext::new("implement function", AgentType::Coder, "input");
709 let output = "fn example() {\n // TODO: implement this\n}";
710
711 let weak_points = checker.identify_weak_points(output, &context);
712 assert!(!weak_points.is_empty());
713 assert!(weak_points.iter().any(|wp| matches!(wp.weakness_type, WeaknessType::Incomplete)));
714 }
715
716 #[test]
717 fn test_should_revise() {
718 let checker = ConfidenceChecker::new(ConfidenceConfig {
719 threshold: 0.7,
720 revision_budget: 3,
721 ..Default::default()
722 });
723
724 let mut context = ExecutionContext::new("test", AgentType::Coder, "input");
725
726 let low_conf_output = "I'm not sure, maybe...";
728 assert!(checker.should_revise(low_conf_output, &context));
729
730 for _ in 0..3 {
732 context.previous_attempts.push(crate::reflection::reflective_agent::PreviousAttempt {
733 attempt_number: 1,
734 output: String::new(),
735 error: None,
736 quality_score: None,
737 duration_ms: 0,
738 reflection: None,
739 });
740 }
741 assert!(!checker.should_revise(low_conf_output, &context));
742 }
743
744 #[test]
745 fn test_weak_point_builder() {
746 let wp = WeakPoint::new("line 5", "Missing error handling", 0.7, WeaknessType::MissingErrorHandling)
747 .with_suggestion("Add Result return type");
748
749 assert_eq!(wp.location, "line 5");
750 assert!(!wp.suggestion.is_empty());
751 }
752
753 #[test]
754 fn test_generate_targeted_revision() {
755 let checker = ConfidenceChecker::new(ConfidenceConfig::default());
756 let weak_points = vec![
757 WeakPoint::new("line 1", "Issue 1", 0.5, WeaknessType::Incomplete)
758 .with_suggestion("Fix it"),
759 ];
760
761 let revision = checker.generate_targeted_revision("original output", &weak_points);
762 assert!(revision.contains("Issue 1"));
763 assert!(revision.contains("Fix it"));
764 assert!(revision.contains("original output"));
765 }
766
767 #[test]
768 fn test_learn_pattern() {
769 let mut checker = ConfidenceChecker::new(ConfidenceConfig::default());
770 checker.learn_pattern("problematic pattern".to_string(), 0.3);
771
772 let context = ExecutionContext::new("test", AgentType::Coder, "input");
773 let output_with_pattern = "This has a problematic pattern in it";
774 let output_without = "This is clean code";
775
776 let conf1 = checker.compute_confidence(output_with_pattern, &context);
777 let conf2 = checker.compute_confidence(output_without, &context);
778
779 assert!(conf1 < conf2);
780 }
781
782 #[test]
783 fn test_record_check() {
784 let mut checker = ConfidenceChecker::new(ConfidenceConfig::default());
785 let context = ExecutionContext::new("test task", AgentType::Coder, "input");
786
787 let record = checker.record_check("test output", &context);
788
789 assert!(!checker.history().is_empty());
790 assert!(record.factors.contains_key("completeness"));
791 }
792}