reasonkit-core 0.1.8

The Reasoning Engine — Auditable Reasoning for Production AI | Rust-Native | Turn Prompts into Protocols
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
//! Edge Case Generator for ThinkTool Protocols
//!
//! This module generates boundary condition test cases for ThinkTool protocols,
//! focusing on infinite recursion, circular logic, and stack overflow scenarios.
//!
//! ## Boundary Conditions Identified:
//!
//! ### Infinite Recursion Scenarios:
//! - Self-refine loops that never converge (max_iterations reached)
//! - Circular dependencies in BedRock principle reconstruction
//! - Protocol steps that reference each other cyclically
//! - Oscillation cycles that don't terminate
//!
//! ### Circular Logic Scenarios:
//! - LaserLogic circular reasoning detection
//! - Arguments where premises restate the conclusion
//! - Validation loops where output validates itself
//!
//! ### Stack Overflow Scenarios:
//! - Deep recursion in principle chains
//! - Very long reasoning chains in protocols
//! - Nested protocol execution
//! - Deep synthesis of many perspectives
//!
//! ### Other Boundary Conditions:
//! - Query too short/long limits
//! - Insufficient perspectives generated
//! - Confidence thresholds not met
//! - Cross-validation failures
//! - Memory exhaustion from large datasets

use super::modules::ThinkToolContext;
use crate::error::{Error, Result};
use serde::{Deserialize, Serialize};

/// Types of edge cases that can occur in ThinkTool protocols
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum EdgeCaseType {
    /// Infinite recursion in self-refinement loops
    InfiniteRecursion,
    /// Circular dependencies between reasoning steps
    CircularDependency,
    /// Deep recursion causing stack overflow
    StackOverflow,
    /// Circular reasoning in logical arguments
    CircularReasoning,
    /// Query input too short for processing
    QueryTooShort,
    /// Query input too long for processing
    QueryTooLong,
    /// Insufficient perspectives generated
    InsufficientPerspectives,
    /// Confidence below acceptable threshold
    LowConfidence,
    /// Cross-validation failure between perspectives
    CrossValidationFailure,
    /// Memory exhaustion from large datasets
    MemoryExhaustion,
    /// Timeout during execution
    ExecutionTimeout,
}

/// Configuration for edge case generation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EdgeCaseGeneratorConfig {
    /// Maximum depth for recursive structures
    pub max_recursion_depth: usize,
    /// Maximum length for generated queries
    pub max_query_length: usize,
    /// Minimum perspectives to consider "insufficient"
    pub min_perspectives_threshold: usize,
    /// Confidence threshold for low confidence cases
    pub low_confidence_threshold: f64,
    /// Whether to include memory-intensive test cases
    pub include_memory_tests: bool,
    /// Whether to include timeout test cases
    pub include_timeout_tests: bool,
}

impl Default for EdgeCaseGeneratorConfig {
    fn default() -> Self {
        Self {
            max_recursion_depth: 100,
            max_query_length: 10000,
            min_perspectives_threshold: 3,
            low_confidence_threshold: 0.3,
            include_memory_tests: false,
            include_timeout_tests: false,
        }
    }
}

/// Generated edge case test case
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EdgeCaseTest {
    /// Type of edge case
    pub case_type: EdgeCaseType,
    /// Human-readable description
    pub description: String,
    /// Input context that triggers the edge case
    pub input: ThinkToolContext,
    /// Expected behavior (what should happen)
    pub expected_behavior: ExpectedBehavior,
    /// Risk level (how dangerous this edge case is)
    pub risk_level: RiskLevel,
    /// Module this test case is designed for
    pub target_module: String,
}

/// Expected behavior when encountering the edge case
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ExpectedBehavior {
    /// Should return an error with specific message
    Error { message_contains: String },
    /// Should timeout after specified duration
    Timeout { max_duration_ms: u64 },
    /// Should succeed but with degraded performance
    DegradedPerformance { acceptable_confidence: f64 },
    /// Should detect and handle the circular logic
    CircularLogicDetected,
    /// Should prevent infinite recursion
    RecursionPrevented,
}

/// Risk level of the edge case
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub enum RiskLevel {
    /// Low risk - may cause minor performance issues
    Low,
    /// Medium risk - may cause significant slowdowns or memory usage
    Medium,
    /// High risk - may cause crashes or infinite loops
    High,
    /// Critical risk - may cause system instability or data corruption
    Critical,
}

/// Edge case generator for ThinkTool protocols
#[derive(Default)]
pub struct EdgeCaseGenerator {
    config: EdgeCaseGeneratorConfig,
    generated_cases: Vec<EdgeCaseTest>,
}

impl EdgeCaseGenerator {
    /// Create a new edge case generator with default configuration
    pub fn new() -> Self {
        Self::default()
    }

    /// Create generator with custom configuration
    pub fn with_config(config: EdgeCaseGeneratorConfig) -> Self {
        Self {
            config,
            generated_cases: Vec::new(),
        }
    }

    /// Generate all edge case test cases
    pub fn generate_all_cases(&mut self) -> Result<&[EdgeCaseTest]> {
        self.generated_cases.clear();

        // Generate cases for each module
        self.generate_gigathink_cases()?;
        self.generate_laserlogic_cases()?;
        self.generate_bedrock_cases()?;
        self.generate_brutalhonesty_cases()?;
        self.generate_proofguard_cases()?;

        // Generate cross-protocol cases
        self.generate_protocol_edge_cases()?;

        Ok(&self.generated_cases)
    }

    /// Generate edge cases specific to GigaThink module
    fn generate_gigathink_cases(&mut self) -> Result<()> {
        // Query too short
        self.generated_cases.push(EdgeCaseTest {
            case_type: EdgeCaseType::QueryTooShort,
            description: "Query too short for meaningful GigaThink analysis".to_string(),
            input: ThinkToolContext::new("X"),
            expected_behavior: ExpectedBehavior::Error {
                message_contains: "Query too short".to_string(),
            },
            risk_level: RiskLevel::Low,
            target_module: "GigaThink".to_string(),
        });

        // Query too long
        let long_query = "What are the implications?".repeat(1000);
        self.generated_cases.push(EdgeCaseTest {
            case_type: EdgeCaseType::QueryTooLong,
            description: "Extremely long query that may cause processing issues".to_string(),
            input: ThinkToolContext::new(long_query),
            expected_behavior: ExpectedBehavior::DegradedPerformance {
                acceptable_confidence: 0.5,
            },
            risk_level: RiskLevel::Medium,
            target_module: "GigaThink".to_string(),
        });

        // Insufficient perspectives scenario (mock this by providing context that should generate few perspectives)
        self.generated_cases.push(EdgeCaseTest {
            case_type: EdgeCaseType::InsufficientPerspectives,
            description: "Query that should generate very few diverse perspectives".to_string(),
            input: ThinkToolContext::new("Yes or no?"),
            expected_behavior: ExpectedBehavior::DegradedPerformance {
                acceptable_confidence: 0.3,
            },
            risk_level: RiskLevel::Low,
            target_module: "GigaThink".to_string(),
        });

        Ok(())
    }

    /// Generate edge cases specific to LaserLogic module
    fn generate_laserlogic_cases(&mut self) -> Result<()> {
        // Circular reasoning case
        self.generated_cases.push(EdgeCaseTest {
            case_type: EdgeCaseType::CircularReasoning,
            description: "Argument with circular reasoning (conclusion restates premise)".to_string(),
            input: ThinkToolContext::new("The Bible is true because it is the word of God. Therefore, the Bible is the word of God."),
            expected_behavior: ExpectedBehavior::CircularLogicDetected,
            risk_level: RiskLevel::Low,
            target_module: "LaserLogic".to_string(),
        });

        // Deep nested logical structure that might cause stack issues
        let deep_logic = self.generate_deep_logical_structure(50);
        self.generated_cases.push(EdgeCaseTest {
            case_type: EdgeCaseType::StackOverflow,
            description: "Deeply nested logical argument structure".to_string(),
            input: ThinkToolContext::new(deep_logic),
            expected_behavior: ExpectedBehavior::DegradedPerformance {
                acceptable_confidence: 0.6,
            },
            risk_level: RiskLevel::High,
            target_module: "LaserLogic".to_string(),
        });

        Ok(())
    }

    /// Generate edge cases specific to BedRock module
    fn generate_bedrock_cases(&mut self) -> Result<()> {
        // Circular dependency in principle reconstruction
        self.generated_cases.push(EdgeCaseTest {
            case_type: EdgeCaseType::CircularDependency,
            description: "Principles with circular dependencies that could cause infinite loops"
                .to_string(),
            input: ThinkToolContext::new("A depends on B, B depends on C, C depends on A"),
            expected_behavior: ExpectedBehavior::RecursionPrevented,
            risk_level: RiskLevel::High,
            target_module: "BedRock".to_string(),
        });

        // Very deep principle chain
        let deep_chain = self.generate_deep_principle_chain(100);
        self.generated_cases.push(EdgeCaseTest {
            case_type: EdgeCaseType::StackOverflow,
            description: "Extremely deep principle decomposition chain".to_string(),
            input: ThinkToolContext::new(deep_chain),
            expected_behavior: ExpectedBehavior::RecursionPrevented,
            risk_level: RiskLevel::Critical,
            target_module: "BedRock".to_string(),
        });

        Ok(())
    }

    /// Generate edge cases specific to BrutalHonesty module
    fn generate_brutalhonesty_cases(&mut self) -> Result<()> {
        // Self-contradictory statement that might cause oscillation
        self.generated_cases.push(EdgeCaseTest {
            case_type: EdgeCaseType::CircularDependency,
            description: "Self-contradictory claim that challenges critique consistency"
                .to_string(),
            input: ThinkToolContext::new(
                "This statement is false, and I am completely honest about it.",
            ),
            expected_behavior: ExpectedBehavior::DegradedPerformance {
                acceptable_confidence: 0.4,
            },
            risk_level: RiskLevel::Medium,
            target_module: "BrutalHonesty".to_string(),
        });

        Ok(())
    }

    /// Generate edge cases specific to ProofGuard module
    fn generate_proofguard_cases(&mut self) -> Result<()> {
        // Claim requiring impossible number of sources
        self.generated_cases.push(EdgeCaseTest {
            case_type: EdgeCaseType::MemoryExhaustion,
            description: "Claim requiring validation against 1000+ sources".to_string(),
            input: ThinkToolContext::new(
                "This claim requires validation from every scientific paper ever written.",
            ),
            expected_behavior: ExpectedBehavior::DegradedPerformance {
                acceptable_confidence: 0.2,
            },
            risk_level: RiskLevel::Medium,
            target_module: "ProofGuard".to_string(),
        });

        Ok(())
    }

    /// Generate cross-protocol edge cases
    fn generate_protocol_edge_cases(&mut self) -> Result<()> {
        // Self-refine loop that never converges
        self.generated_cases.push(EdgeCaseTest {
            case_type: EdgeCaseType::InfiniteRecursion,
            description: "Self-refinement that oscillates without convergence".to_string(),
            input: ThinkToolContext::new("Improve this answer forever without stopping."),
            expected_behavior: ExpectedBehavior::RecursionPrevented,
            risk_level: RiskLevel::Critical,
            target_module: "SelfRefine".to_string(),
        });

        // Oscillation that doesn't terminate
        self.generated_cases.push(EdgeCaseTest {
            case_type: EdgeCaseType::InfiniteRecursion,
            description: "Oscillation pattern that generates infinite divergent-convergent cycles"
                .to_string(),
            input: ThinkToolContext::new("Generate ideas that keep branching forever."),
            expected_behavior: ExpectedBehavior::RecursionPrevented,
            risk_level: RiskLevel::Critical,
            target_module: "Oscillation".to_string(),
        });

        if self.config.include_timeout_tests {
            // Timeout case
            self.generated_cases.push(EdgeCaseTest {
                case_type: EdgeCaseType::ExecutionTimeout,
                description: "Query designed to cause excessive processing time".to_string(),
                input: ThinkToolContext::new(
                    "Solve every unsolved mathematical problem in the world simultaneously.",
                ),
                expected_behavior: ExpectedBehavior::Timeout {
                    max_duration_ms: 30000,
                },
                risk_level: RiskLevel::High,
                target_module: "Any".to_string(),
            });
        }

        if self.config.include_memory_tests {
            // Memory exhaustion case
            let memory_hog = "Consider all possible combinations of: ".repeat(1000)
                + "every word in every language.";
            self.generated_cases.push(EdgeCaseTest {
                case_type: EdgeCaseType::MemoryExhaustion,
                description: "Query designed to exhaust available memory".to_string(),
                input: ThinkToolContext::new(memory_hog),
                expected_behavior: ExpectedBehavior::Error {
                    message_contains: "memory".to_string(),
                },
                risk_level: RiskLevel::Critical,
                target_module: "Any".to_string(),
            });
        }

        Ok(())
    }

    /// Generate a deeply nested logical structure
    fn generate_deep_logical_structure(&self, depth: usize) -> String {
        let mut structure = "If A then B.".to_string();

        for i in 0..depth {
            structure = format!("If {} then C{}, and C{} implies D{}.", structure, i, i, i);
        }

        format!("{} Therefore, Z follows.", structure)
    }

    /// Generate a deep principle dependency chain
    fn generate_deep_principle_chain(&self, depth: usize) -> String {
        let mut chain = Vec::new();

        for i in 0..depth {
            chain.push(format!("Principle {} depends on principle {}", i, i + 1));
        }

        chain.push(format!("Principle {} depends on principle 0", depth));
        chain.join(". ")
    }

    /// Get all generated test cases
    pub fn test_cases(&self) -> &[EdgeCaseTest] {
        &self.generated_cases
    }

    /// Filter test cases by risk level
    pub fn filter_by_risk_level(&self, min_risk: RiskLevel) -> Vec<&EdgeCaseTest> {
        self.generated_cases
            .iter()
            .filter(|case| case.risk_level >= min_risk)
            .collect()
    }

    /// Filter test cases by type
    pub fn filter_by_type(&self, case_type: EdgeCaseType) -> Vec<&EdgeCaseTest> {
        self.generated_cases
            .iter()
            .filter(|case| case.case_type == case_type)
            .collect()
    }

    /// Export test cases to JSON
    pub fn export_to_json(&self) -> Result<String> {
        serde_json::to_string_pretty(&self.generated_cases).map_err(Error::Json)
    }

    /// Import test cases from JSON
    pub fn import_from_json(&mut self, json: &str) -> Result<()> {
        self.generated_cases = serde_json::from_str(json).map_err(Error::Json)?;
        Ok(())
    }
}

/// Test runner for edge cases
pub struct EdgeCaseRunner {
    generator: EdgeCaseGenerator,
}

impl EdgeCaseRunner {
    /// Create a new edge case runner
    pub fn new(config: EdgeCaseGeneratorConfig) -> Self {
        Self {
            generator: EdgeCaseGenerator::with_config(config),
        }
    }

    /// Run all generated edge case tests
    pub async fn run_all_tests(&mut self) -> Result<EdgeCaseReport> {
        self.generator.generate_all_cases()?;

        let mut report = EdgeCaseReport {
            total_tests: self.generator.test_cases().len(),
            passed: 0,
            failed: 0,
            skipped: 0,
            results: Vec::new(),
        };

        // Note: In a real implementation, this would execute each test case
        // against the actual ThinkTool modules. For now, we just report structure.

        for test_case in self.generator.test_cases() {
            // Simulate test execution (would be replaced with actual module calls)
            let result = EdgeCaseResult {
                test_case: test_case.clone(),
                success: true,          // Placeholder
                execution_time_ms: 100, // Placeholder
                error_message: None,
                actual_behavior: "Test executed successfully".to_string(),
            };

            if result.success {
                report.passed += 1;
            } else {
                report.failed += 1;
            }

            report.results.push(result);
        }

        Ok(report)
    }
}

/// Report from running edge case tests
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EdgeCaseReport {
    /// Total number of tests run
    pub total_tests: usize,
    /// Number of tests that passed
    pub passed: usize,
    /// Number of tests that failed
    pub failed: usize,
    /// Number of tests that were skipped
    pub skipped: usize,
    /// Detailed results for each test
    pub results: Vec<EdgeCaseResult>,
}

/// Result of a single edge case test
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EdgeCaseResult {
    /// The test case that was executed
    pub test_case: EdgeCaseTest,
    /// Whether the test passed
    pub success: bool,
    /// Execution time in milliseconds
    pub execution_time_ms: u64,
    /// Error message if the test failed
    pub error_message: Option<String>,
    /// Description of what actually happened
    pub actual_behavior: String,
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::collections::HashSet;

    #[test]
    fn test_edge_case_generator_creation() {
        let generator = EdgeCaseGenerator::new();
        assert!(generator.test_cases().is_empty());
    }

    #[test]
    fn test_generate_gigathink_cases() {
        let mut generator = EdgeCaseGenerator::new();
        generator.generate_gigathink_cases().unwrap();

        let cases = generator.test_cases();
        assert!(!cases.is_empty());

        // Should have query too short, too long, and insufficient perspectives cases
        let types: HashSet<_> = cases.iter().map(|c| c.case_type).collect();
        assert!(types.contains(&EdgeCaseType::QueryTooShort));
        assert!(types.contains(&EdgeCaseType::QueryTooLong));
        assert!(types.contains(&EdgeCaseType::InsufficientPerspectives));
    }

    #[test]
    fn test_generate_laserlogic_cases() {
        let mut generator = EdgeCaseGenerator::new();
        generator.generate_laserlogic_cases().unwrap();

        let cases = generator.test_cases();
        assert!(!cases.is_empty());

        let types: HashSet<_> = cases.iter().map(|c| c.case_type).collect();
        assert!(types.contains(&EdgeCaseType::CircularReasoning));
        assert!(types.contains(&EdgeCaseType::StackOverflow));
    }

    #[test]
    fn test_deep_structure_generation() {
        let generator = EdgeCaseGenerator::new();
        let structure = generator.generate_deep_logical_structure(3);

        // Should contain nested if-then statements
        assert!(structure.contains("If"));
        assert!(structure.contains("then"));
        assert!(structure.contains("Therefore"));
    }

    #[test]
    fn test_filter_by_risk_level() {
        let mut generator = EdgeCaseGenerator::new();
        generator.generate_all_cases().unwrap();

        let high_risk_cases = generator.filter_by_risk_level(RiskLevel::High);
        assert!(!high_risk_cases.is_empty());

        // All returned cases should be high risk or higher
        for case in high_risk_cases {
            assert!(case.risk_level >= RiskLevel::High);
        }
    }

    #[test]
    fn test_json_export_import() {
        let mut generator = EdgeCaseGenerator::new();
        generator.generate_gigathink_cases().unwrap();

        let json = generator.export_to_json().unwrap();
        assert!(!json.is_empty());

        let mut new_generator = EdgeCaseGenerator::new();
        new_generator.import_from_json(&json).unwrap();

        assert_eq!(
            generator.test_cases().len(),
            new_generator.test_cases().len()
        );
    }

    #[test]
    fn test_edge_case_runner() {
        let config = EdgeCaseGeneratorConfig {
            include_memory_tests: false,
            include_timeout_tests: false,
            ..Default::default()
        };

        let mut runner = EdgeCaseRunner::new(config);

        // This would normally run actual tests, but for this test we just check structure
        let report = futures::executor::block_on(runner.run_all_tests()).unwrap();
        assert_eq!(
            report.total_tests,
            report.passed + report.failed + report.skipped
        );
    }
}