uncertain_rs/hypothesis.rs
1#![allow(clippy::cast_precision_loss)]
2
3use crate::Uncertain;
4
5/// Result of hypothesis testing
6#[derive(Debug, Clone)]
7pub struct HypothesisResult {
8 /// Whether the hypothesis was accepted (true) or rejected (false)
9 pub decision: bool,
10 /// Observed probability from samples
11 pub probability: f64,
12 /// Confidence level used in the test
13 pub confidence_level: f64,
14 /// Number of samples used to make the decision
15 pub samples_used: usize,
16}
17
18/// Evidence-based conditional methods for uncertain boolean values
19impl Uncertain<bool> {
20 /// Evidence-based conditional using hypothesis testing
21 ///
22 /// This is the core method that implements the paper's key insight:
23 /// ask about evidence, not boolean facts.
24 ///
25 /// # Arguments
26 /// * `threshold` - Probability threshold to exceed
27 /// * `confidence_level` - Confidence level for the test (default: 0.95)
28 /// * `max_samples` - Maximum number of samples to use (default: 10000)
29 ///
30 /// # Example
31 /// ```rust
32 /// use uncertain_rs::{Uncertain, operations::Comparison};
33 ///
34 /// let speed = Uncertain::normal(55.0, 5.0);
35 /// let speeding_evidence = Comparison::gt(&speed, 60.0);
36 ///
37 /// // Only issue ticket if 95% confident speeding
38 /// if speeding_evidence.probability_exceeds(0.95) {
39 /// println!("Issue speeding ticket");
40 /// }
41 /// ```
42 #[must_use]
43 pub fn probability_exceeds(&self, threshold: f64) -> bool {
44 self.probability_exceeds_with_params(threshold, 0.95, 10000)
45 }
46
47 /// Evidence-based conditional with configurable parameters
48 ///
49 /// # Example
50 /// ```rust
51 /// use uncertain_rs::Uncertain;
52 ///
53 /// let condition = Uncertain::bernoulli(0.7);
54 /// let confident = condition.probability_exceeds_with_params(0.6, 0.99, 5000);
55 /// ```
56 #[must_use]
57 pub fn probability_exceeds_with_params(
58 &self,
59 threshold: f64,
60 confidence_level: f64,
61 max_samples: usize,
62 ) -> bool {
63 let result = self.evaluate_hypothesis(
64 threshold,
65 confidence_level,
66 max_samples,
67 None, // epsilon
68 None, // alpha
69 None, // beta
70 10, // batch_size
71 );
72 result.decision
73 }
74
75 /// Implicit conditional (equivalent to `probability_exceeds(0.5)`)
76 ///
77 /// This provides a convenient way to use uncertain booleans in if statements
78 /// while still respecting the uncertainty.
79 ///
80 /// # Example
81 /// ```rust
82 /// use uncertain_rs::{Uncertain, operations::Comparison};
83 ///
84 /// let measurement = Uncertain::normal(10.0, 2.0);
85 /// let above_threshold = Comparison::gt(&measurement, 8.0);
86 ///
87 /// if above_threshold.implicit_conditional() {
88 /// println!("More likely than not above threshold");
89 /// }
90 /// ```
91 #[must_use]
92 pub fn implicit_conditional(&self) -> bool {
93 self.probability_exceeds(0.5)
94 }
95
96 /// Performs Sequential Probability Ratio Test (SPRT) for efficient hypothesis testing
97 ///
98 /// This implements the SPRT algorithm described in the paper for efficient
99 /// evidence evaluation with automatic sample size determination.
100 ///
101 /// # Arguments
102 /// * `threshold` - Probability threshold for H1: P(true) > threshold
103 /// * `confidence_level` - Overall confidence level
104 /// * `max_samples` - Maximum samples before fallback decision
105 /// * `epsilon` - Indifference region size (default: 0.05)
106 /// * `alpha` - Type I error rate (false positive, default: 1 - `confidence_level`)
107 /// * `beta` - Type II error rate (false negative, default: alpha)
108 /// * `batch_size` - Samples to process in each batch (default: 10)
109 ///
110 /// # Returns
111 /// `HypothesisResult` containing the decision and test statistics
112 ///
113 /// # Example
114 /// ```rust
115 /// use uncertain_rs::Uncertain;
116 ///
117 /// let biased_coin = Uncertain::bernoulli(0.7);
118 /// let result = biased_coin.evaluate_hypothesis(
119 /// 0.6, // threshold
120 /// 0.95, // confidence_level
121 /// 5000, // max_samples
122 /// Some(0.05), // epsilon
123 /// Some(0.01), // alpha (Type I error)
124 /// Some(0.01), // beta (Type II error)
125 /// 20, // batch_size
126 /// );
127 ///
128 /// println!("Decision: {}", result.decision);
129 /// println!("Probability: {:.3}", result.probability);
130 /// println!("Samples used: {}", result.samples_used);
131 /// ```
132 #[must_use]
133 pub fn evaluate_hypothesis(
134 &self,
135 threshold: f64,
136 confidence_level: f64,
137 max_samples: usize,
138 epsilon: Option<f64>,
139 alpha: Option<f64>,
140 beta: Option<f64>,
141 batch_size: usize,
142 ) -> HypothesisResult {
143 let epsilon = epsilon.unwrap_or(0.05);
144 let alpha_error = alpha.unwrap_or(1.0 - confidence_level);
145 let beta_error = beta.unwrap_or(alpha_error);
146
147 // Calculate SPRT boundaries
148 let a = (beta_error / (1.0 - alpha_error)).ln();
149 let b = ((1.0 - beta_error) / alpha_error).ln();
150
151 // Set indifference region (avoiding edge cases)
152 let p0 = (threshold - epsilon).clamp(0.001, 0.999);
153 let p1 = (threshold + epsilon).clamp(0.001, 0.999);
154
155 let mut successes = 0;
156 let mut samples = 0;
157
158 while samples < max_samples {
159 // Batch sampling for efficiency
160 let current_batch_size = (max_samples - samples).min(batch_size);
161 let mut batch_successes = 0;
162
163 for _ in 0..current_batch_size {
164 if self.sample() {
165 batch_successes += 1;
166 }
167 }
168
169 successes += batch_successes;
170 samples += current_batch_size;
171
172 // Compute log-likelihood ratio (LLR)
173 let n = samples as f64;
174 let x = f64::from(successes);
175
176 // Avoid log(0) by clamping probabilities
177 let p0_clamped = p0.clamp(1e-10, 1.0 - 1e-10);
178 let p1_clamped = p1.clamp(1e-10, 1.0 - 1e-10);
179
180 let llr = x * (p1_clamped / p0_clamped).ln()
181 + (n - x) * ((1.0 - p1_clamped) / (1.0 - p0_clamped)).ln();
182
183 if llr <= a {
184 // Accept H0: P(true) <= threshold
185 return HypothesisResult {
186 decision: false,
187 probability: f64::from(successes) / samples as f64,
188 confidence_level,
189 samples_used: samples,
190 };
191 } else if llr >= b {
192 // Accept H1: P(true) > threshold
193 return HypothesisResult {
194 decision: true,
195 probability: f64::from(successes) / samples as f64,
196 confidence_level,
197 samples_used: samples,
198 };
199 }
200 }
201
202 // Fallback decision based on observed probability
203 let final_p = f64::from(successes) / samples as f64;
204 HypothesisResult {
205 decision: final_p > threshold,
206 probability: final_p,
207 confidence_level,
208 samples_used: samples,
209 }
210 }
211
212 /// Estimates the probability that this condition is true
213 ///
214 /// # Example
215 /// ```rust
216 /// use uncertain_rs::Uncertain;
217 ///
218 /// let condition = Uncertain::bernoulli(0.7);
219 /// let prob = condition.estimate_probability(1000);
220 /// // Should be approximately 0.7
221 /// ```
222 #[must_use]
223 pub fn estimate_probability(&self, sample_count: usize) -> f64 {
224 let samples: Vec<bool> = self.take_samples(sample_count);
225 if samples.is_empty() {
226 0.0
227 } else {
228 samples.iter().filter(|&&x| x).count() as f64 / samples.len() as f64
229 }
230 }
231
232 /// Bayesian evidence update using Bayes' theorem
233 ///
234 /// Updates the probability of this condition given observed evidence.
235 ///
236 /// # Arguments
237 /// * `prior_prob` - Prior probability of the condition
238 /// * `evidence` - Observed evidence (another uncertain boolean)
239 /// * `likelihood_given_true` - P(evidence | condition is true)
240 /// * `likelihood_given_false` - P(evidence | condition is false)
241 /// * `sample_count` - Number of samples for estimation
242 ///
243 /// # Example
244 /// ```rust
245 /// use uncertain_rs::Uncertain;
246 ///
247 /// let disease = Uncertain::bernoulli(0.01); // 1% base rate
248 /// let test_positive = Uncertain::bernoulli(0.95); // Test result
249 ///
250 /// let posterior = Uncertain::bayesian_update(
251 /// 0.01, // prior probability
252 /// &test_positive,
253 /// 0.95, // sensitivity (true positive rate)
254 /// 0.05, // false positive rate
255 /// 1000
256 /// );
257 /// ```
258 #[must_use]
259 pub fn bayesian_update(
260 prior_prob: f64,
261 evidence: &Uncertain<bool>,
262 likelihood_given_true: f64,
263 likelihood_given_false: f64,
264 sample_count: usize,
265 ) -> f64 {
266 let evidence_prob = evidence.estimate_probability(sample_count);
267
268 // Bayes' theorem: P(H|E) = P(E|H) * P(H) / P(E)
269 // P(E) = P(E|H) * P(H) + P(E|¬H) * P(¬H)
270 let evidence_total =
271 likelihood_given_true * prior_prob + likelihood_given_false * (1.0 - prior_prob);
272
273 if evidence_total == 0.0 {
274 return prior_prob;
275 }
276
277 if evidence_prob > 0.5 {
278 // Evidence is present
279 (likelihood_given_true * prior_prob) / evidence_total
280 } else {
281 // Evidence is absent
282 ((1.0 - likelihood_given_true) * prior_prob) / (1.0 - evidence_total)
283 }
284 }
285}
286
287/// Sequential testing for multiple hypotheses
288pub struct MultipleHypothesisTester {
289 hypotheses: Vec<Uncertain<bool>>,
290 names: Vec<String>,
291}
292
293impl MultipleHypothesisTester {
294 /// Create a new multiple hypothesis tester
295 ///
296 /// # Example
297 /// ```rust
298 /// use uncertain_rs::{Uncertain, hypothesis::MultipleHypothesisTester, operations::Comparison};
299 ///
300 /// let temp = Uncertain::normal(22.0, 2.0);
301 /// let hypotheses = vec![
302 /// Comparison::gt(&temp, 20.0),
303 /// Comparison::gt(&temp, 25.0),
304 /// Comparison::lt(&temp, 18.0),
305 /// ];
306 /// let names = vec!["warm", "hot", "cold"];
307 ///
308 /// let tester = MultipleHypothesisTester::new(hypotheses, names);
309 /// ```
310 #[must_use]
311 pub fn new(hypotheses: Vec<Uncertain<bool>>, names: Vec<&str>) -> Self {
312 Self {
313 hypotheses,
314 names: names
315 .into_iter()
316 .map(std::string::ToString::to_string)
317 .collect(),
318 }
319 }
320
321 /// Test all hypotheses and return results
322 ///
323 /// Uses Bonferroni correction to control family-wise error rate.
324 #[must_use]
325 pub fn test_all(
326 &self,
327 overall_alpha: f64,
328 max_samples: usize,
329 ) -> Vec<(String, HypothesisResult)> {
330 let corrected_alpha = if self.hypotheses.is_empty() {
331 overall_alpha
332 } else {
333 overall_alpha / self.hypotheses.len() as f64
334 };
335 let confidence_level = 1.0 - corrected_alpha;
336
337 self.hypotheses
338 .iter()
339 .zip(self.names.iter())
340 .map(|(hypothesis, name)| {
341 let result = hypothesis.evaluate_hypothesis(
342 0.5,
343 confidence_level,
344 max_samples,
345 None,
346 Some(corrected_alpha),
347 None,
348 10,
349 );
350 (name.clone(), result)
351 })
352 .collect()
353 }
354
355 /// Find the hypothesis with the highest probability
356 #[must_use]
357 pub fn find_most_likely(&self, sample_count: usize) -> Option<(String, f64)> {
358 let mut best_name = None;
359 let mut best_prob = 0.0;
360
361 for (hypothesis, name) in self.hypotheses.iter().zip(self.names.iter()) {
362 let prob = hypothesis.estimate_probability(sample_count);
363 if prob > best_prob {
364 best_prob = prob;
365 best_name = Some(name.clone());
366 }
367 }
368
369 best_name.map(|name| (name, best_prob))
370 }
371}
372
373#[cfg(test)]
374mod tests {
375 use super::*;
376 use crate::operations::Comparison;
377
378 #[test]
379 fn test_probability_exceeds() {
380 let always_true = Uncertain::point(true);
381 let always_false = Uncertain::point(false);
382
383 assert!(always_true.probability_exceeds(0.5));
384 assert!(always_true.probability_exceeds(0.95));
385 assert!(!always_false.probability_exceeds(0.05));
386 assert!(!always_false.probability_exceeds(0.95));
387 }
388
389 #[test]
390 fn test_implicit_conditional() {
391 let likely_true = Uncertain::bernoulli(0.8);
392 let likely_false = Uncertain::bernoulli(0.2);
393
394 // These are probabilistic, so we test multiple times
395 let mut true_count = 0;
396 let mut false_count = 0;
397
398 for _ in 0..10 {
399 if likely_true.implicit_conditional() {
400 true_count += 1;
401 }
402 if likely_false.implicit_conditional() {
403 false_count += 1;
404 }
405 }
406
407 // Should be mostly true and mostly false respectively
408 assert!(true_count > false_count);
409 }
410
411 #[test]
412 fn test_hypothesis_testing_with_known_probability() {
413 let biased_coin = Uncertain::bernoulli(0.7);
414
415 // Test exceeds 0.6 (should be true)
416 let result1 = biased_coin.evaluate_hypothesis(0.6, 0.95, 5000, None, None, None, 20);
417 assert!(result1.decision);
418 assert!((result1.probability - 0.7).abs() < 0.2);
419
420 // Test exceeds 0.8 (should be false)
421 let result2 = biased_coin.evaluate_hypothesis(0.8, 0.95, 5000, None, None, None, 20);
422 assert!(!result2.decision);
423 }
424
425 #[test]
426 fn test_sprt_efficiency() {
427 // Clear cases should be decided quickly
428 let certain_true = Uncertain::point(true);
429 let result = certain_true.evaluate_hypothesis(0.5, 0.95, 10000, None, None, None, 10);
430
431 assert!(result.decision);
432 assert!(result.samples_used < 100); // Should decide quickly
433 }
434
435 #[test]
436 fn test_evidence_based_conditionals() {
437 let speed = Uncertain::normal(55.0, 5.0);
438 let speeding_evidence = Comparison::gt(&speed, 60.0);
439
440 // With mean=55, std=5, P(X > 60) should be relatively low
441 let high_confidence = speeding_evidence.probability_exceeds(0.95);
442
443 // These tests are probabilistic but should generally hold
444 assert!(!high_confidence); // Very unlikely to be 95% confident
445 }
446
447 #[test]
448 fn test_bayesian_update() {
449 let test_positive = Uncertain::bernoulli(0.95);
450
451 let posterior = Uncertain::bayesian_update(
452 0.01, // prior: 1% disease rate
453 &test_positive,
454 0.95, // sensitivity
455 0.05, // false positive rate
456 1000,
457 );
458
459 // Posterior should be higher than prior given positive test
460 assert!(posterior > 0.01);
461 }
462 #[test]
463 fn test_multiple_hypothesis_testing() {
464 let temp = Uncertain::normal(22.0, 2.0);
465 let hypotheses = vec![
466 Comparison::gt(&temp, 20.0), // Should be likely true
467 Comparison::gt(&temp, 30.0), // Should be false
468 Comparison::lt(&temp, 15.0), // Should be false
469 ];
470 let names = vec!["warm", "hot", "cold"];
471
472 let tester = MultipleHypothesisTester::new(hypotheses, names);
473 let results = tester.test_all(0.05, 1000);
474
475 assert_eq!(results.len(), 3);
476
477 // First hypothesis (warm) should likely be true
478 assert_eq!(results[0].0, "warm");
479 // Can't guarantee the decision due to randomness, but we can check structure
480 assert!(results[0].1.samples_used > 0);
481 }
482
483 #[test]
484 fn test_find_most_likely() {
485 let temp = Uncertain::normal(22.0, 1.0);
486 let hypotheses = vec![
487 Comparison::gt(&temp, 25.0), // Unlikely
488 Comparison::gt(&temp, 20.0), // Very likely
489 Comparison::lt(&temp, 18.0), // Unlikely
490 ];
491 let names = vec!["hot", "warm", "cold"];
492
493 let tester = MultipleHypothesisTester::new(hypotheses, names);
494 let most_likely = tester.find_most_likely(1000);
495
496 // Should find "warm" as most likely
497 assert!(most_likely.is_some());
498 let (name, prob) = most_likely.unwrap();
499 assert_eq!(name, "warm");
500 assert!(prob > 0.8); // Should have high probability
501 }
502}