reasonkit/thinktool/modules/
brutalhonesty.rs

1//! BrutalHonesty Module - Adversarial Self-Critique
2//!
3//! Red-team analysis that finds flaws before others do, challenges
4//! assumptions aggressively, and scores confidence with skepticism.
5//!
6//! ## Design Philosophy
7//!
8//! BrutalHonesty applies adversarial thinking to identify weaknesses:
9//! - **Assumption Hunter**: Extracts implicit assumptions and questions them
10//! - **Flaw Finder**: Categorizes weaknesses by severity and type
11//! - **Skeptical Scorer**: Adjusts confidence downward based on issues found
12//! - **Devil's Advocate**: Argues against the position to stress-test it
13//!
14//! ## Usage
15//!
16//! ```ignore
17//! use reasonkit::thinktool::modules::{BrutalHonesty, ThinkToolContext};
18//!
19//! let module = BrutalHonesty::builder()
20//!     .severity(CritiqueSeverity::Ruthless)
21//!     .enable_devil_advocate(true)
22//!     .build();
23//!
24//! let context = ThinkToolContext {
25//!     query: "Our startup will succeed because we have the best team".to_string(),
26//!     previous_steps: vec![],
27//! };
28//!
29//! let result = module.execute(&context)?;
30//! ```
31
32use super::{ThinkToolContext, ThinkToolModule, ThinkToolModuleConfig, ThinkToolOutput};
33use crate::error::{Error, Result};
34use serde::{Deserialize, Serialize};
35use serde_json::json;
36
37/// Severity level for critique analysis.
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
39#[serde(rename_all = "snake_case")]
40pub enum CritiqueSeverity {
41    /// Gentle critique - focus on constructive feedback
42    Gentle,
43    /// Standard critique - balanced flaw detection
44    #[default]
45    Standard,
46    /// Harsh critique - aggressive assumption challenging
47    Harsh,
48    /// Ruthless critique - no mercy, find every possible flaw
49    Ruthless,
50}
51
52impl CritiqueSeverity {
53    /// Get the skepticism multiplier for confidence scoring.
54    /// Higher skepticism = lower confidence adjustments.
55    fn skepticism_multiplier(&self) -> f64 {
56        match self {
57            Self::Gentle => 0.90,   // 10% skepticism reduction
58            Self::Standard => 0.80, // 20% skepticism reduction
59            Self::Harsh => 0.65,    // 35% skepticism reduction
60            Self::Ruthless => 0.50, // 50% skepticism reduction
61        }
62    }
63}
64
65/// Category of detected flaw.
66#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
67#[serde(rename_all = "snake_case")]
68pub enum FlawCategory {
69    /// Logical flaw (fallacy, contradiction, non-sequitur)
70    Logical,
71    /// Evidential flaw (missing data, weak sources, cherry-picking)
72    Evidential,
73    /// Assumption flaw (unexamined premises, hidden biases)
74    Assumption,
75    /// Scope flaw (overgeneralization, false dichotomy)
76    Scope,
77    /// Temporal flaw (recency bias, ignoring history)
78    Temporal,
79    /// Adversarial flaw (vulnerability to counter-arguments)
80    Adversarial,
81    /// Completeness flaw (missing considerations, blind spots)
82    Completeness,
83}
84
85/// Severity of a detected flaw.
86#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
87#[serde(rename_all = "snake_case")]
88pub enum FlawSeverity {
89    /// Minor issue - worth noting but not critical
90    Minor,
91    /// Moderate issue - should be addressed
92    Moderate,
93    /// Major issue - significantly weakens the argument
94    Major,
95    /// Critical issue - fundamentally undermines the position
96    Critical,
97}
98
99impl FlawSeverity {
100    /// Get the confidence penalty for this severity level.
101    fn confidence_penalty(&self) -> f64 {
102        match self {
103            Self::Minor => 0.02,
104            Self::Moderate => 0.08,
105            Self::Major => 0.15,
106            Self::Critical => 0.30,
107        }
108    }
109}
110
111/// A detected flaw in the reasoning.
112#[derive(Debug, Clone, Serialize, Deserialize)]
113pub struct DetectedFlaw {
114    /// Category of the flaw
115    pub category: FlawCategory,
116    /// Severity of the flaw
117    pub severity: FlawSeverity,
118    /// Description of the flaw
119    pub description: String,
120    /// The specific text or aspect that triggered this flaw
121    pub trigger: Option<String>,
122    /// Suggested remediation
123    pub remediation: Option<String>,
124}
125
126impl DetectedFlaw {
127    /// Create a new detected flaw.
128    pub fn new(
129        category: FlawCategory,
130        severity: FlawSeverity,
131        description: impl Into<String>,
132    ) -> Self {
133        Self {
134            category,
135            severity,
136            description: description.into(),
137            trigger: None,
138            remediation: None,
139        }
140    }
141
142    /// Add a trigger to the flaw.
143    pub fn with_trigger(mut self, trigger: impl Into<String>) -> Self {
144        self.trigger = Some(trigger.into());
145        self
146    }
147
148    /// Add a remediation suggestion.
149    pub fn with_remediation(mut self, remediation: impl Into<String>) -> Self {
150        self.remediation = Some(remediation.into());
151        self
152    }
153}
154
155/// An implicit assumption detected in the reasoning.
156#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct ImplicitAssumption {
158    /// The assumption itself
159    pub assumption: String,
160    /// How confident we are this is an implicit assumption (0.0-1.0)
161    pub confidence: f64,
162    /// Why this assumption may be problematic
163    pub risk: String,
164    /// Whether this assumption is likely valid
165    pub likely_valid: bool,
166}
167
168/// A strength identified in the reasoning.
169#[derive(Debug, Clone, Serialize, Deserialize)]
170pub struct IdentifiedStrength {
171    /// Description of the strength
172    pub description: String,
173    /// How significant this strength is (0.0-1.0)
174    pub significance: f64,
175}
176
177/// Overall verdict from the brutal honesty analysis.
178#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
179#[serde(rename_all = "snake_case")]
180pub enum CritiqueVerdict {
181    /// The reasoning is solid and defensible
182    Solid,
183    /// The reasoning has merit but needs improvement
184    Promising,
185    /// The reasoning has significant issues
186    Weak,
187    /// The reasoning is fundamentally flawed
188    Flawed,
189    /// Cannot assess due to insufficient information
190    Indeterminate,
191}
192
193/// Configuration for the BrutalHonesty module.
194#[derive(Debug, Clone, Serialize, Deserialize)]
195pub struct BrutalHonestyConfig {
196    /// Severity level for critique
197    pub severity: CritiqueSeverity,
198    /// Whether to enable devil's advocate mode
199    pub enable_devil_advocate: bool,
200    /// Whether to check for confirmation bias
201    pub check_confirmation_bias: bool,
202    /// Minimum confidence threshold (below this triggers warning)
203    pub min_confidence_threshold: f64,
204    /// Maximum number of flaws to report
205    pub max_flaws_reported: usize,
206    /// Focus areas for critique (empty = all areas)
207    pub focus_areas: Vec<FlawCategory>,
208}
209
210impl Default for BrutalHonestyConfig {
211    fn default() -> Self {
212        Self {
213            severity: CritiqueSeverity::Standard,
214            enable_devil_advocate: true,
215            check_confirmation_bias: true,
216            min_confidence_threshold: 0.50,
217            max_flaws_reported: 10,
218            focus_areas: vec![],
219        }
220    }
221}
222
223/// Builder for BrutalHonesty module configuration.
224#[derive(Debug, Default)]
225pub struct BrutalHonestyBuilder {
226    config: BrutalHonestyConfig,
227}
228
229impl BrutalHonestyBuilder {
230    /// Create a new builder with default configuration.
231    pub fn new() -> Self {
232        Self::default()
233    }
234
235    /// Set the critique severity level.
236    pub fn severity(mut self, severity: CritiqueSeverity) -> Self {
237        self.config.severity = severity;
238        self
239    }
240
241    /// Enable or disable devil's advocate mode.
242    pub fn enable_devil_advocate(mut self, enable: bool) -> Self {
243        self.config.enable_devil_advocate = enable;
244        self
245    }
246
247    /// Enable or disable confirmation bias checking.
248    pub fn check_confirmation_bias(mut self, enable: bool) -> Self {
249        self.config.check_confirmation_bias = enable;
250        self
251    }
252
253    /// Set the minimum confidence threshold.
254    pub fn min_confidence_threshold(mut self, threshold: f64) -> Self {
255        self.config.min_confidence_threshold = threshold.clamp(0.0, 1.0);
256        self
257    }
258
259    /// Set the maximum number of flaws to report.
260    pub fn max_flaws_reported(mut self, max: usize) -> Self {
261        self.config.max_flaws_reported = max;
262        self
263    }
264
265    /// Set focus areas for critique.
266    pub fn focus_areas(mut self, areas: Vec<FlawCategory>) -> Self {
267        self.config.focus_areas = areas;
268        self
269    }
270
271    /// Build the BrutalHonesty module.
272    pub fn build(self) -> BrutalHonesty {
273        BrutalHonesty::with_config(self.config)
274    }
275}
276
277/// BrutalHonesty reasoning module for adversarial critique.
278///
279/// Attacks ideas to identify weaknesses, challenges assumptions,
280/// and scores confidence with appropriate skepticism.
281pub struct BrutalHonesty {
282    /// Module base configuration
283    module_config: ThinkToolModuleConfig,
284    /// BrutalHonesty-specific configuration
285    config: BrutalHonestyConfig,
286}
287
288impl Default for BrutalHonesty {
289    fn default() -> Self {
290        Self::new()
291    }
292}
293
294impl BrutalHonesty {
295    /// Create a new BrutalHonesty module with default configuration.
296    pub fn new() -> Self {
297        Self::with_config(BrutalHonestyConfig::default())
298    }
299
300    /// Create a BrutalHonesty module with custom configuration.
301    pub fn with_config(config: BrutalHonestyConfig) -> Self {
302        Self {
303            module_config: ThinkToolModuleConfig {
304                name: "BrutalHonesty".to_string(),
305                version: "3.0.0".to_string(),
306                description: "Adversarial self-critique with skeptical confidence scoring"
307                    .to_string(),
308                confidence_weight: 0.15,
309            },
310            config,
311        }
312    }
313
314    /// Create a builder for customizing the module.
315    pub fn builder() -> BrutalHonestyBuilder {
316        BrutalHonestyBuilder::new()
317    }
318
319    /// Get the current configuration.
320    pub fn brutal_config(&self) -> &BrutalHonestyConfig {
321        &self.config
322    }
323
324    /// Analyze the input for implicit assumptions.
325    fn extract_assumptions(&self, query: &str) -> Vec<ImplicitAssumption> {
326        let mut assumptions = Vec::new();
327
328        // Pattern-based assumption detection
329        let assumption_patterns = [
330            ("will", "Assumes future outcome is certain", 0.75),
331            ("always", "Assumes universal applicability", 0.80),
332            ("never", "Assumes absolute exclusion", 0.80),
333            ("everyone", "Assumes universal agreement", 0.85),
334            ("obvious", "Assumes shared understanding", 0.70),
335            ("clearly", "Assumes self-evidence", 0.65),
336            ("best", "Assumes optimal status without comparison", 0.60),
337            ("only", "Assumes exclusivity", 0.70),
338            ("must", "Assumes necessity without justification", 0.65),
339            ("should", "Assumes normative position", 0.55),
340            ("need", "Assumes requirement without evidence", 0.60),
341            ("because", "May assume causation from correlation", 0.50),
342        ];
343
344        let query_lower = query.to_lowercase();
345
346        for (pattern, risk, confidence) in assumption_patterns {
347            if query_lower.contains(pattern) {
348                assumptions.push(ImplicitAssumption {
349                    assumption: format!(
350                        "Use of '{}' implies unstated certainty or universality",
351                        pattern
352                    ),
353                    confidence,
354                    risk: risk.to_string(),
355                    likely_valid: confidence < 0.65,
356                });
357            }
358        }
359
360        // Check for causal language without evidence
361        if query_lower.contains("because")
362            && !query_lower.contains("data")
363            && !query_lower.contains("evidence")
364            && !query_lower.contains("study")
365            && !query_lower.contains("research")
366        {
367            assumptions.push(ImplicitAssumption {
368                assumption: "Causal claim made without citing evidence".to_string(),
369                confidence: 0.70,
370                risk: "Causation may be assumed from correlation".to_string(),
371                likely_valid: false,
372            });
373        }
374
375        // Check for value judgments
376        let value_words = ["good", "bad", "right", "wrong", "better", "worse"];
377        for word in value_words {
378            if query_lower.contains(word) {
379                assumptions.push(ImplicitAssumption {
380                    assumption: format!("Value judgment '{}' assumes shared moral framework", word),
381                    confidence: 0.55,
382                    risk: "Value judgments may not be universally shared".to_string(),
383                    likely_valid: true, // Values can be valid but should be acknowledged
384                });
385                break; // Only report once
386            }
387        }
388
389        assumptions
390    }
391
392    /// Detect flaws in the reasoning.
393    fn detect_flaws(&self, query: &str, previous_steps: &[String]) -> Vec<DetectedFlaw> {
394        let mut flaws = Vec::new();
395        let query_lower = query.to_lowercase();
396        let query_len = query.len();
397
398        // Check for overgeneralization
399        let universal_quantifiers = ["all", "every", "always", "never", "none", "no one"];
400        for quantifier in universal_quantifiers {
401            if query_lower.contains(quantifier) {
402                flaws.push(
403                    DetectedFlaw::new(
404                        FlawCategory::Scope,
405                        FlawSeverity::Moderate,
406                        format!(
407                            "Universal quantifier '{}' may indicate overgeneralization",
408                            quantifier
409                        ),
410                    )
411                    .with_remediation("Consider whether there are exceptions or edge cases"),
412                );
413            }
414        }
415
416        // Check for appeal to authority without specifics
417        if (query_lower.contains("expert")
418            || query_lower.contains("studies show")
419            || query_lower.contains("research shows"))
420            && !query_lower.contains("according to")
421            && !query_lower.contains("published")
422        {
423            flaws.push(
424                DetectedFlaw::new(
425                    FlawCategory::Evidential,
426                    FlawSeverity::Moderate,
427                    "Vague appeal to authority without specific citation",
428                )
429                .with_remediation("Cite specific sources, authors, or publications"),
430            );
431        }
432
433        // Check for false dichotomy
434        if query_lower.contains("either") && query_lower.contains("or") {
435            flaws.push(
436                DetectedFlaw::new(
437                    FlawCategory::Logical,
438                    FlawSeverity::Moderate,
439                    "Either/or construction may present false dichotomy",
440                )
441                .with_trigger("either...or")
442                .with_remediation("Consider whether other alternatives exist"),
443            );
444        }
445
446        // Check for recency bias
447        if query_lower.contains("nowadays")
448            || query_lower.contains("these days")
449            || query_lower.contains("modern")
450        {
451            flaws.push(
452                DetectedFlaw::new(
453                    FlawCategory::Temporal,
454                    FlawSeverity::Minor,
455                    "Temporal framing may indicate recency bias",
456                )
457                .with_remediation("Consider historical patterns and whether 'new' means 'better'"),
458            );
459        }
460
461        // Check for emotional language that may cloud reasoning
462        let emotional_words = [
463            "amazing",
464            "terrible",
465            "disaster",
466            "revolutionary",
467            "incredible",
468            "horrible",
469            "catastrophic",
470            "miraculous",
471            "devastating",
472        ];
473        for word in emotional_words {
474            if query_lower.contains(word) {
475                flaws.push(
476                    DetectedFlaw::new(
477                        FlawCategory::Logical,
478                        FlawSeverity::Minor,
479                        format!("Emotional language '{}' may indicate bias", word),
480                    )
481                    .with_trigger(word)
482                    .with_remediation("Replace emotional terms with factual descriptions"),
483                );
484                break;
485            }
486        }
487
488        // Check for lack of counter-arguments
489        let counter_arg_indicators = [
490            "however",
491            "although",
492            "but",
493            "on the other hand",
494            "conversely",
495        ];
496        let has_counter = counter_arg_indicators
497            .iter()
498            .any(|ind| query_lower.contains(ind));
499
500        if !has_counter && query_len > 100 {
501            flaws.push(
502                DetectedFlaw::new(
503                    FlawCategory::Completeness,
504                    FlawSeverity::Major,
505                    "No counter-arguments or alternative viewpoints presented",
506                )
507                .with_remediation("Steel-man opposing positions before dismissing them"),
508            );
509        }
510
511        // Check for consistency with previous steps
512        if !previous_steps.is_empty() {
513            let prev_combined = previous_steps.join(" ").to_lowercase();
514
515            // Look for potential contradictions
516            if (query_lower.contains("not") && !prev_combined.contains("not"))
517                || (!query_lower.contains("not") && prev_combined.contains("not "))
518            {
519                flaws.push(
520                    DetectedFlaw::new(
521                        FlawCategory::Logical,
522                        FlawSeverity::Moderate,
523                        "Potential inconsistency detected with previous reasoning steps",
524                    )
525                    .with_remediation("Review previous steps for logical consistency"),
526                );
527            }
528        }
529
530        // Check for vague claims
531        let vague_indicators = ["somewhat", "kind of", "sort of", "basically", "essentially"];
532        for vague in vague_indicators {
533            if query_lower.contains(vague) {
534                flaws.push(
535                    DetectedFlaw::new(
536                        FlawCategory::Evidential,
537                        FlawSeverity::Minor,
538                        format!("Vague qualifier '{}' reduces precision", vague),
539                    )
540                    .with_trigger(vague)
541                    .with_remediation("Be more specific and precise in claims"),
542                );
543            }
544        }
545
546        // Check for confirmation bias indicators
547        if self.config.check_confirmation_bias
548            && query_lower.contains("proves")
549            && !query_lower.contains("disproves")
550        {
551            flaws.push(
552                DetectedFlaw::new(
553                    FlawCategory::Assumption,
554                    FlawSeverity::Moderate,
555                    "One-sided evidence presentation may indicate confirmation bias",
556                )
557                .with_remediation("Actively seek disconfirming evidence"),
558            );
559        }
560
561        // Limit flaws to configured maximum
562        if flaws.len() > self.config.max_flaws_reported {
563            // Sort by severity (most severe first) and take top N
564            flaws.sort_by(|a, b| b.severity.cmp(&a.severity));
565            flaws.truncate(self.config.max_flaws_reported);
566        }
567
568        flaws
569    }
570
571    /// Identify strengths in the reasoning.
572    fn identify_strengths(&self, query: &str) -> Vec<IdentifiedStrength> {
573        let mut strengths = Vec::new();
574        let query_lower = query.to_lowercase();
575
576        // Check for evidence-based reasoning
577        if query_lower.contains("data")
578            || query_lower.contains("evidence")
579            || query_lower.contains("study")
580            || query_lower.contains("research")
581        {
582            strengths.push(IdentifiedStrength {
583                description: "References to data or evidence support claims".to_string(),
584                significance: 0.75,
585            });
586        }
587
588        // Check for nuanced language
589        if query_lower.contains("however")
590            || query_lower.contains("although")
591            || query_lower.contains("on the other hand")
592        {
593            strengths.push(IdentifiedStrength {
594                description: "Acknowledges counter-arguments or nuance".to_string(),
595                significance: 0.70,
596            });
597        }
598
599        // Check for specific examples
600        if query_lower.contains("for example")
601            || query_lower.contains("for instance")
602            || query_lower.contains("specifically")
603        {
604            strengths.push(IdentifiedStrength {
605                description: "Uses specific examples to support arguments".to_string(),
606                significance: 0.65,
607            });
608        }
609
610        // Check for qualified claims
611        if query_lower.contains("likely")
612            || query_lower.contains("probably")
613            || query_lower.contains("may")
614            || query_lower.contains("might")
615        {
616            strengths.push(IdentifiedStrength {
617                description: "Uses appropriate epistemic qualifiers".to_string(),
618                significance: 0.60,
619            });
620        }
621
622        // Check for structured reasoning
623        if query_lower.contains("first")
624            || query_lower.contains("second")
625            || query_lower.contains("finally")
626            || query_lower.contains("therefore")
627        {
628            strengths.push(IdentifiedStrength {
629                description: "Demonstrates structured reasoning approach".to_string(),
630                significance: 0.55,
631            });
632        }
633
634        strengths
635    }
636
637    /// Generate a devil's advocate counter-argument.
638    fn devils_advocate(&self, query: &str) -> Option<String> {
639        if !self.config.enable_devil_advocate {
640            return None;
641        }
642
643        let query_lower = query.to_lowercase();
644
645        // Generate contextual counter-arguments
646        if query_lower.contains("will succeed") || query_lower.contains("will work") {
647            Some("What if the underlying assumptions about market conditions, timing, or execution are wrong? What specific failure modes have been considered?".to_string())
648        } else if query_lower.contains("best") {
649            Some("By what criteria is 'best' defined? Have alternatives been fairly evaluated? Could 'best' be contingent on circumstances?".to_string())
650        } else if query_lower.contains("everyone") || query_lower.contains("all") {
651            Some("Are there exceptions or edge cases being overlooked? Is this universality actually validated by data?".to_string())
652        } else if query_lower.contains("obvious") || query_lower.contains("clearly") {
653            Some("What appears obvious from one perspective may not be from another. Have blind spots been systematically checked?".to_string())
654        } else {
655            Some("What is the strongest argument against this position? What would make this claim false?".to_string())
656        }
657    }
658
659    /// Calculate skeptical confidence score.
660    fn calculate_skeptical_confidence(
661        &self,
662        flaws: &[DetectedFlaw],
663        strengths: &[IdentifiedStrength],
664    ) -> f64 {
665        // Start with base confidence
666        let mut confidence = 0.75;
667
668        // Apply flaw penalties
669        for flaw in flaws {
670            confidence -= flaw.severity.confidence_penalty();
671        }
672
673        // Apply strength bonuses (but with diminishing returns)
674        let mut strength_bonus = 0.0;
675        for (i, strength) in strengths.iter().enumerate() {
676            // Diminishing returns: each subsequent strength adds less
677            let diminish_factor = 1.0 / (1.0 + i as f64 * 0.5);
678            strength_bonus += strength.significance * 0.1 * diminish_factor;
679        }
680        confidence += strength_bonus;
681
682        // Apply severity-based skepticism multiplier
683        confidence *= self.config.severity.skepticism_multiplier();
684
685        // Clamp to valid range
686        confidence.clamp(0.0, 0.95) // Never return 1.0 - always leave room for doubt
687    }
688
689    /// Determine the overall verdict.
690    fn determine_verdict(&self, confidence: f64, flaws: &[DetectedFlaw]) -> CritiqueVerdict {
691        // Count critical and major flaws
692        let critical_count = flaws
693            .iter()
694            .filter(|f| f.severity == FlawSeverity::Critical)
695            .count();
696        let major_count = flaws
697            .iter()
698            .filter(|f| f.severity == FlawSeverity::Major)
699            .count();
700
701        if critical_count > 0 {
702            return CritiqueVerdict::Flawed;
703        }
704
705        if major_count >= 3 || confidence < 0.30 {
706            return CritiqueVerdict::Flawed;
707        }
708
709        if major_count >= 1 || confidence < 0.50 {
710            return CritiqueVerdict::Weak;
711        }
712
713        if flaws.len() >= 3 || confidence < 0.70 {
714            return CritiqueVerdict::Promising;
715        }
716
717        if confidence >= 0.70 && flaws.len() <= 2 {
718            return CritiqueVerdict::Solid;
719        }
720
721        CritiqueVerdict::Promising
722    }
723
724    /// Generate the most critical fix recommendation.
725    fn critical_fix(&self, flaws: &[DetectedFlaw]) -> Option<String> {
726        // Find the most severe flaw
727        flaws.iter().max_by_key(|f| &f.severity).and_then(|f| {
728            f.remediation.clone().or_else(|| {
729                Some(format!(
730                    "Address {} issue: {}",
731                    match f.category {
732                        FlawCategory::Logical => "logical",
733                        FlawCategory::Evidential => "evidential",
734                        FlawCategory::Assumption => "assumption",
735                        FlawCategory::Scope => "scope",
736                        FlawCategory::Temporal => "temporal",
737                        FlawCategory::Adversarial => "adversarial",
738                        FlawCategory::Completeness => "completeness",
739                    },
740                    f.description
741                ))
742            })
743        })
744    }
745}
746
747impl ThinkToolModule for BrutalHonesty {
748    fn config(&self) -> &ThinkToolModuleConfig {
749        &self.module_config
750    }
751
752    fn execute(&self, context: &ThinkToolContext) -> Result<ThinkToolOutput> {
753        // Validate input
754        if context.query.trim().is_empty() {
755            return Err(Error::validation(
756                "BrutalHonesty requires non-empty query input",
757            ));
758        }
759
760        // Perform adversarial analysis
761        let assumptions = self.extract_assumptions(&context.query);
762        let flaws = self.detect_flaws(&context.query, &context.previous_steps);
763        let strengths = self.identify_strengths(&context.query);
764        let devils_advocate = self.devils_advocate(&context.query);
765
766        // Calculate skeptical confidence
767        let confidence = self.calculate_skeptical_confidence(&flaws, &strengths);
768
769        // Determine verdict
770        let verdict = self.determine_verdict(confidence, &flaws);
771
772        // Get critical fix
773        let critical_fix = self.critical_fix(&flaws);
774
775        // Build confidence warning
776        let confidence_warning = if confidence < self.config.min_confidence_threshold {
777            Some(format!(
778                "Confidence {:.0}% is below threshold {:.0}%",
779                confidence * 100.0,
780                self.config.min_confidence_threshold * 100.0
781            ))
782        } else {
783            None
784        };
785
786        // Construct output
787        let output = json!({
788            "verdict": verdict,
789            "confidence": confidence,
790            "confidence_warning": confidence_warning,
791            "severity_applied": format!("{:?}", self.config.severity),
792            "analysis": {
793                "assumptions": assumptions,
794                "flaws": flaws,
795                "strengths": strengths,
796                "flaw_count": flaws.len(),
797                "strength_count": strengths.len(),
798            },
799            "devils_advocate": devils_advocate,
800            "critical_fix": critical_fix,
801            "metadata": {
802                "input_length": context.query.len(),
803                "previous_steps_count": context.previous_steps.len(),
804                "skepticism_multiplier": self.config.severity.skepticism_multiplier(),
805            }
806        });
807
808        Ok(ThinkToolOutput {
809            module: self.module_config.name.clone(),
810            confidence,
811            output,
812        })
813    }
814}
815
816#[cfg(test)]
817mod tests {
818    use super::*;
819
820    #[test]
821    fn test_default_module() {
822        let module = BrutalHonesty::new();
823        assert_eq!(module.config().name, "BrutalHonesty");
824        assert_eq!(module.config().version, "3.0.0");
825        assert_eq!(module.brutal_config().severity, CritiqueSeverity::Standard);
826    }
827
828    #[test]
829    fn test_builder_pattern() {
830        let module = BrutalHonesty::builder()
831            .severity(CritiqueSeverity::Ruthless)
832            .enable_devil_advocate(false)
833            .min_confidence_threshold(0.60)
834            .build();
835
836        assert_eq!(module.brutal_config().severity, CritiqueSeverity::Ruthless);
837        assert!(!module.brutal_config().enable_devil_advocate);
838        assert!((module.brutal_config().min_confidence_threshold - 0.60).abs() < 0.001);
839    }
840
841    #[test]
842    fn test_assumption_extraction() {
843        let module = BrutalHonesty::new();
844        let assumptions = module.extract_assumptions("Our product will always be the best");
845
846        assert!(!assumptions.is_empty());
847        let has_will = assumptions.iter().any(|a| a.assumption.contains("will"));
848        let has_always = assumptions.iter().any(|a| a.assumption.contains("always"));
849        let has_best = assumptions.iter().any(|a| a.assumption.contains("best"));
850
851        assert!(has_will || has_always || has_best);
852    }
853
854    #[test]
855    fn test_flaw_detection() {
856        let module = BrutalHonesty::new();
857        let flaws = module.detect_flaws(
858            "Either we succeed or we fail completely. All experts agree this is amazing.",
859            &[],
860        );
861
862        assert!(!flaws.is_empty());
863
864        // Should detect false dichotomy
865        let has_dichotomy = flaws.iter().any(|f| f.category == FlawCategory::Logical);
866        assert!(has_dichotomy);
867
868        // Should detect overgeneralization
869        let has_scope = flaws.iter().any(|f| f.category == FlawCategory::Scope);
870        assert!(has_scope);
871    }
872
873    #[test]
874    fn test_strength_identification() {
875        let module = BrutalHonesty::new();
876        let strengths = module.identify_strengths(
877            "The data shows, for example, that our approach is likely effective. However, there are limitations.",
878        );
879
880        assert!(!strengths.is_empty());
881        assert!(strengths.len() >= 2); // data + for example + however + likely
882    }
883
884    #[test]
885    fn test_skeptical_confidence() {
886        let module = BrutalHonesty::new();
887
888        // With flaws, confidence should be reduced
889        let flaws = vec![DetectedFlaw::new(
890            FlawCategory::Logical,
891            FlawSeverity::Major,
892            "Test flaw",
893        )];
894        let confidence = module.calculate_skeptical_confidence(&flaws, &[]);
895        assert!(confidence < 0.60); // Major flaw + skepticism should reduce significantly
896
897        // No flaws with strengths should be higher
898        let strengths = vec![IdentifiedStrength {
899            description: "Test strength".to_string(),
900            significance: 0.8,
901        }];
902        let confidence_with_strength = module.calculate_skeptical_confidence(&[], &strengths);
903        assert!(confidence_with_strength > confidence);
904    }
905
906    #[test]
907    fn test_verdict_determination() {
908        let module = BrutalHonesty::new();
909
910        // Critical flaw should result in Flawed verdict
911        let flaws = vec![DetectedFlaw::new(
912            FlawCategory::Logical,
913            FlawSeverity::Critical,
914            "Critical issue",
915        )];
916        let verdict = module.determine_verdict(0.5, &flaws);
917        assert_eq!(verdict, CritiqueVerdict::Flawed);
918
919        // No flaws with high confidence should be Solid
920        let verdict = module.determine_verdict(0.85, &[]);
921        assert_eq!(verdict, CritiqueVerdict::Solid);
922    }
923
924    #[test]
925    fn test_execute_empty_input() {
926        let module = BrutalHonesty::new();
927        let context = ThinkToolContext {
928            query: "".to_string(),
929            previous_steps: vec![],
930        };
931
932        let result = module.execute(&context);
933        assert!(result.is_err());
934    }
935
936    #[test]
937    fn test_execute_valid_input() {
938        let module = BrutalHonesty::new();
939        let context = ThinkToolContext {
940            query: "Our startup will succeed because we have the best team".to_string(),
941            previous_steps: vec![],
942        };
943
944        let result = module.execute(&context).unwrap();
945        assert_eq!(result.module, "BrutalHonesty");
946        assert!(result.confidence > 0.0);
947        assert!(result.confidence <= 0.95);
948
949        // Check output structure
950        let output = &result.output;
951        assert!(output.get("verdict").is_some());
952        assert!(output.get("analysis").is_some());
953        assert!(output.get("devils_advocate").is_some());
954    }
955
956    #[test]
957    fn test_severity_affects_confidence() {
958        let gentle = BrutalHonesty::builder()
959            .severity(CritiqueSeverity::Gentle)
960            .build();
961        let ruthless = BrutalHonesty::builder()
962            .severity(CritiqueSeverity::Ruthless)
963            .build();
964
965        let context = ThinkToolContext {
966            query: "This approach will work well".to_string(),
967            previous_steps: vec![],
968        };
969
970        let gentle_result = gentle.execute(&context).unwrap();
971        let ruthless_result = ruthless.execute(&context).unwrap();
972
973        // Ruthless should have lower confidence
974        assert!(ruthless_result.confidence < gentle_result.confidence);
975    }
976
977    #[test]
978    fn test_devils_advocate() {
979        let module = BrutalHonesty::new();
980
981        // Should generate counter-argument
982        let counter = module.devils_advocate("We will succeed");
983        assert!(counter.is_some());
984        assert!(counter.unwrap().contains("?"));
985
986        // With devil's advocate disabled
987        let no_devil = BrutalHonesty::builder()
988            .enable_devil_advocate(false)
989            .build();
990        let counter = no_devil.devils_advocate("We will succeed");
991        assert!(counter.is_none());
992    }
993
994    #[test]
995    fn test_flaw_severity_ordering() {
996        assert!(FlawSeverity::Critical > FlawSeverity::Major);
997        assert!(FlawSeverity::Major > FlawSeverity::Moderate);
998        assert!(FlawSeverity::Moderate > FlawSeverity::Minor);
999    }
1000
1001    #[test]
1002    fn test_max_flaws_limit() {
1003        let module = BrutalHonesty::builder().max_flaws_reported(2).build();
1004
1005        // Input with many potential flaws
1006        let flaws = module.detect_flaws(
1007            "All experts always agree that this amazing product will never fail because it's obviously the best",
1008            &[],
1009        );
1010
1011        assert!(flaws.len() <= 2);
1012    }
1013}
reasonkit/thinktool/modules/brutalhonesty.rs

reasonkit/thinktool/modules/
brutalhonesty.rs