ricecoder_learning/
confidence_score_property.rs

1/// Property-based tests for confidence score accuracy
2/// **Feature: ricecoder-learning, Property 7: Confidence Score Accuracy**
3/// **Validates: Requirements 3.6**
4
5#[cfg(test)]
6mod tests {
7    use proptest::prelude::*;
8    use crate::{Decision, DecisionContext, PatternCapturer};
9    use std::path::PathBuf;
10
11    /// Strategy for generating decision contexts
12    fn decision_context_strategy() -> impl Strategy<Value = DecisionContext> {
13        (
14            "/project",
15            "/project/src/main.rs",
16            0u32..1000,
17            "test_agent",
18        )
19            .prop_map(|(project, file, line, agent)| DecisionContext {
20                project_path: PathBuf::from(project),
21                file_path: PathBuf::from(file),
22                line_number: line,
23                agent_type: agent.to_string(),
24            })
25    }
26
27    /// Strategy for generating JSON values
28    fn json_value_strategy() -> impl Strategy<Value = serde_json::Value> {
29        prop_oneof![
30            Just(serde_json::json!({})),
31            Just(serde_json::json!({"key": "value"})),
32            Just(serde_json::json!({"number": 42})),
33            Just(serde_json::json!({"array": [1, 2, 3]})),
34            Just(serde_json::json!({"nested": {"inner": "value"}})),
35        ]
36    }
37
38    /// Strategy for generating decisions
39    fn decision_strategy() -> impl Strategy<Value = Decision> {
40        (
41            decision_context_strategy(),
42            "code_generation|refactoring|analysis",
43            json_value_strategy(),
44            json_value_strategy(),
45        )
46            .prop_map(|(context, decision_type, input, output)| {
47                Decision::new(context, decision_type.to_string(), input, output)
48            })
49    }
50
51    /// Property 7: Confidence Score Accuracy
52    /// For any rule, the confidence score SHALL be updated based on validation results,
53    /// increasing when rules are successfully applied and decreasing when they fail.
54    #[test]
55    fn prop_confidence_score_increases_on_success() {
56        proptest!(|(decisions in prop::collection::vec(decision_strategy(), 2..20))| {
57            let capturer = PatternCapturer::new();
58
59            // Extract patterns
60            let patterns = capturer.extract_patterns(&decisions).expect("Extraction failed");
61
62            for pattern in patterns {
63                let initial_confidence = pattern.confidence;
64
65                // Validate pattern against decisions (should succeed)
66                let validation_score = capturer
67                    .validate_pattern(&pattern, &decisions)
68                    .expect("Validation failed");
69
70                // Validation score should be between 0 and 1
71                prop_assert!(validation_score >= 0.0, "Validation score should be >= 0");
72                prop_assert!(validation_score <= 1.0, "Validation score should be <= 1");
73
74                // If validation score is high, confidence should increase
75                if validation_score > 0.7 {
76                    // Confidence should be positive
77                    prop_assert!(initial_confidence >= 0.0, "Initial confidence should be >= 0");
78                }
79            }
80        });
81    }
82
83    /// Property: Confidence scores should be bounded
84    /// For any pattern, the confidence score should always be between 0 and 1
85    #[test]
86    fn prop_confidence_score_bounded() {
87        proptest!(|(decisions in prop::collection::vec(decision_strategy(), 2..20))| {
88            let capturer = PatternCapturer::new();
89
90            // Extract patterns
91            let patterns = capturer.extract_patterns(&decisions).expect("Extraction failed");
92
93            for pattern in patterns {
94                prop_assert!(
95                    pattern.confidence >= 0.0,
96                    "Confidence should be >= 0, got {}",
97                    pattern.confidence
98                );
99                prop_assert!(
100                    pattern.confidence <= 1.0,
101                    "Confidence should be <= 1, got {}",
102                    pattern.confidence
103                );
104            }
105        });
106    }
107
108    /// Property: Confidence updates should be monotonic
109    /// For any pattern, updating confidence with a higher validation score should
110    /// increase the confidence (or keep it the same)
111    #[test]
112    fn prop_confidence_update_monotonic() {
113        proptest!(|(
114            decisions in prop::collection::vec(decision_strategy(), 2..20),
115            validation_score in 0.0f32..=1.0f32
116        )| {
117            let capturer = PatternCapturer::new();
118
119            // Extract patterns
120            let patterns = capturer.extract_patterns(&decisions).expect("Extraction failed");
121
122            for mut pattern in patterns {
123                let initial_confidence = pattern.confidence;
124
125                // Update confidence
126                capturer
127                    .update_confidence(&mut pattern, validation_score)
128                    .expect("Update failed");
129
130                // Confidence should still be bounded
131                prop_assert!(pattern.confidence >= 0.0, "Confidence should be >= 0");
132                prop_assert!(pattern.confidence <= 1.0, "Confidence should be <= 1");
133
134                // Confidence should change (unless it's already at the boundary)
135                // Using exponential moving average with alpha=0.3
136                let expected = (0.3 * validation_score) + (0.7 * initial_confidence);
137                prop_assert!(
138                    (pattern.confidence - expected).abs() < 0.0001,
139                    "Confidence update should follow EMA formula"
140                );
141            }
142        });
143    }
144
145    /// Property: Confidence should reflect pattern consistency
146    /// For patterns with consistent outputs, confidence should be higher
147    #[test]
148    fn prop_confidence_reflects_consistency() {
149        proptest!(|(decision in decision_strategy())| {
150            let capturer = PatternCapturer::new();
151
152            // Create multiple identical decisions
153            let identical_decisions = vec![decision.clone(), decision.clone(), decision.clone()];
154
155            // Extract patterns
156            let patterns = capturer
157                .extract_patterns(&identical_decisions)
158                .expect("Extraction failed");
159
160            // Patterns from identical decisions should have reasonable confidence
161            for pattern in patterns {
162                prop_assert!(
163                    pattern.confidence > 0.0,
164                    "Confidence should be > 0 for consistent patterns"
165                );
166            }
167        });
168    }
169
170    /// Property: Validation score should be consistent
171    /// For the same pattern and decision history, validation should produce the same score
172    #[test]
173    fn prop_validation_score_consistent() {
174        proptest!(|(decisions in prop::collection::vec(decision_strategy(), 2..20))| {
175            let capturer = PatternCapturer::new();
176
177            // Extract patterns
178            let patterns = capturer.extract_patterns(&decisions).expect("Extraction failed");
179
180            for pattern in patterns {
181                // Validate multiple times
182                let score1 = capturer
183                    .validate_pattern(&pattern, &decisions)
184                    .expect("Validation 1 failed");
185                let score2 = capturer
186                    .validate_pattern(&pattern, &decisions)
187                    .expect("Validation 2 failed");
188                let score3 = capturer
189                    .validate_pattern(&pattern, &decisions)
190                    .expect("Validation 3 failed");
191
192                // Scores should be identical
193                prop_assert!(
194                    (score1 - score2).abs() < 0.0001,
195                    "Validation scores should be consistent"
196                );
197                prop_assert!(
198                    (score2 - score3).abs() < 0.0001,
199                    "Validation scores should be consistent"
200                );
201            }
202        });
203    }
204
205    /// Property: Confidence should increase with more matching examples
206    /// For patterns with more matching examples, confidence should be higher
207    #[test]
208    fn prop_confidence_increases_with_matches() {
209        proptest!(|(decision in decision_strategy())| {
210            let capturer = PatternCapturer::new();
211
212            // Create decisions with varying numbers of matches
213            let double_decision = vec![decision.clone(), decision.clone()];
214            let triple_decision = vec![decision.clone(), decision.clone(), decision.clone()];
215
216            // Extract patterns (only double and triple should produce patterns)
217            let patterns_double = capturer
218                .extract_patterns(&double_decision)
219                .expect("Extraction failed");
220            let patterns_triple = capturer
221                .extract_patterns(&triple_decision)
222                .expect("Extraction failed");
223
224            // Both should produce patterns
225            prop_assert!(patterns_double.len() > 0, "Double decision should produce patterns");
226            prop_assert!(patterns_triple.len() > 0, "Triple decision should produce patterns");
227
228            // Triple should have more occurrences
229            if patterns_double.len() > 0 && patterns_triple.len() > 0 {
230                let double_occurrences = patterns_double[0].occurrences;
231                let triple_occurrences = patterns_triple[0].occurrences;
232
233                prop_assert!(
234                    triple_occurrences > double_occurrences,
235                    "Triple should have more occurrences"
236                );
237            }
238        });
239    }
240
241    /// Property: Confidence should be bounded
242    /// For any pattern, confidence should always be between 0 and 1
243    #[test]
244    fn prop_confidence_deterministic() {
245        proptest!(|(decisions in prop::collection::vec(decision_strategy(), 2..20))| {
246            let capturer = PatternCapturer::new();
247
248            // Extract patterns multiple times
249            let patterns1 = capturer.extract_patterns(&decisions).expect("Extraction 1 failed");
250            let patterns2 = capturer.extract_patterns(&decisions).expect("Extraction 2 failed");
251            let patterns3 = capturer.extract_patterns(&decisions).expect("Extraction 3 failed");
252
253            // All patterns should have bounded confidence
254            for pattern in patterns1.iter().chain(patterns2.iter()).chain(patterns3.iter()) {
255                prop_assert!(
256                    pattern.confidence >= 0.0 && pattern.confidence <= 1.0,
257                    "Confidence should be bounded: {}",
258                    pattern.confidence
259                );
260            }
261        });
262    }
263}