1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
use rand::distributions::Distribution as RandDistribution;
use statrs::{
distribution::{Continuous, ContinuousCDF},
statistics::{Distribution, Max, Median, Min, Mode},
};
use crate::{Error, EstimationFitQuality, Result};
/// A trait for probability distributions used in estimation scenarios.
///
/// This trait combines several statistical traits and provides additional methods
/// specific to estimation and risk analysis. It's designed to be implemented by
/// distributions that model uncertainty in estimates, such as project durations,
/// costs, or other quantifiable outcomes.
///
/// # Examples
///
/// ```
/// use distimate::prelude::*;
/// use distimate::Pert;
/// use approx::assert_relative_eq;
///
/// let pert = Pert::new(1.0, 2.0, 3.0).unwrap();
/// assert_relative_eq!(pert.expected_value(), 2.0, epsilon = 1e-6);
/// assert_relative_eq!(pert.optimistic_estimate(), 1.37847, epsilon = 1e-4);
/// assert_relative_eq!(pert.pessimistic_estimate(), 2.62152, epsilon = 1e-4);
/// ```
pub trait EstimationDistribution:
Distribution<f64>
+ Continuous<f64, f64>
+ ContinuousCDF<f64, f64>
+ Median<f64>
+ Mode<f64>
+ Min<f64>
+ Max<f64>
+ RandDistribution<f64> // TODO: add TryFrom (x2) here
{
/// Returns the estimate at a given percentile.
///
/// # Arguments
///
/// * `p` - The percentile as a float between 0.0 and 1.0.
///
/// # Returns
///
/// The estimated value at the given percentile, or an error if the percentile is invalid.
///
/// # Examples
///
/// ```
/// use distimate::prelude::*;
/// use distimate::Pert;
/// use approx::assert_relative_eq;
///
/// let pert = Pert::new(1.0, 2.0, 3.0).unwrap();
/// assert_relative_eq!(pert.percentile_estimate(0.5).unwrap(), 2.0, epsilon = 1e-4);
/// ```
fn percentile_estimate(&self, p: f64) -> Result<f64> {
if !(0.0..=1.0).contains(&p) {
Err(Error::InvalidPercentile(p))
} else {
Ok(self.inverse_cdf(p))
}
}
/// Returns the optimistic (best-case) estimate, typically the 5th percentile.
///
/// # Examples
///
/// ```
/// use distimate::prelude::*;
/// use distimate::Pert;
/// use approx::assert_relative_eq;
///
/// let pert = Pert::new(1.0, 2.0, 3.0).unwrap();
/// assert_relative_eq!(pert.optimistic_estimate(), 1.37847, epsilon = 1e-4);
/// ```
fn optimistic_estimate(&self) -> f64 {
self.percentile_estimate(0.05).unwrap()
}
/// Returns the pessimistic (worst-case) estimate, typically the 95th percentile.
///
/// # Examples
///
/// ```
/// use distimate::prelude::*;
/// use distimate::Pert;
/// use approx::assert_relative_eq;
///
/// let pert = Pert::new(1.0, 2.0, 3.0).unwrap();
/// assert_relative_eq!(pert.pessimistic_estimate(), 2.62152, epsilon = 1e-4);
/// ```
fn pessimistic_estimate(&self) -> f64 {
self.percentile_estimate(0.95).unwrap()
}
/// Returns the most likely estimate (mode).
///
/// # Examples
///
/// ```
/// use distimate::prelude::*;
/// use distimate::Pert;
///
/// let pert = Pert::new(1.0, 2.0, 3.0).unwrap();
/// assert_eq!(pert.most_likely_estimate(), 2.0);
/// ```
fn most_likely_estimate(&self) -> f64 {
self.mode()
}
/// Returns the expected value (mean) of the distribution.
///
/// # Examples
///
/// ```
/// use distimate::prelude::*;
/// use distimate::Pert;
/// use approx::assert_relative_eq;
///
/// let pert = Pert::new(1.0, 2.0, 3.0).unwrap();
/// assert_relative_eq!(pert.expected_value(), 2.0, epsilon = 1e-6);
/// ```
fn expected_value(&self) -> f64 {
self.mean().unwrap()
}
/// Returns the standard deviation of the distribution.
///
/// # Examples
///
/// ```
/// use distimate::prelude::*;
/// use distimate::Pert;
/// use approx::assert_relative_eq;
///
/// let pert = Pert::new(1.0, 2.0, 3.0).unwrap();
/// assert_relative_eq!(pert.uncertainty(), 0.377964, epsilon = 1e-4);
/// ```
fn uncertainty(&self) -> f64 {
self.std_dev().unwrap()
}
/// Calculates the probability of completing a task or project by a given estimate.
///
/// This method computes the probability that the actual value will be less than or equal to
/// the provided estimate. It's particularly useful in project management and risk analysis
/// to quantify the likelihood of meeting deadlines or budget targets.
///
/// The probability of completion is equivalent to the cumulative distribution function (CDF)
/// evaluated at the given estimate.
///
/// # Arguments
///
/// * `estimate` - The point estimate to evaluate the probability against.
///
/// # Returns
///
/// A float between 0 and 1 representing the probability of completing by the given estimate.
///
/// # Examples
///
/// ```
/// use distimate::prelude::*;
/// use distimate::Pert;
/// use approx::assert_relative_eq;
///
/// let pert = Pert::new(1.0, 2.0, 3.0).unwrap();
/// assert_relative_eq!(pert.probability_of_completion(2.5), 0.8436, epsilon = 1e-4);
/// ```
fn probability_of_completion(&self, estimate: f64) -> f64 {
self.cdf(estimate)
}
/// Calculates the risk of exceeding a given estimate.
///
/// This method computes the probability that the actual value will be greater than
/// the provided estimate. It's particularly useful in project management and risk
/// analysis to quantify the likelihood of cost overruns or schedule delays.
///
/// The risk of overrun is calculated as 1 minus the probability of completion:
/// Risk = 1 - P(X <= estimate), where X is the random variable representing the
/// actual cost or time.
///
/// For example:
/// - A risk of 0.2 (20%) means there's a 20% chance the project will exceed the estimate.
/// - A risk of 0.5 (50%) means it's equally likely to be above or below the estimate.
/// - A risk of 0.8 (80%) suggests a high likelihood of exceeding the estimate.
///
/// # Arguments
///
/// * `estimate` - The point estimate to evaluate the risk against.
///
/// # Returns
///
/// A float between 0 and 1 representing the probability of exceeding the estimate.
fn risk_of_overrun(&self, estimate: f64) -> f64 {
1.0 - self.probability_of_completion(estimate)
}
/// Returns a confidence interval for the estimate.
///
/// A confidence interval provides a range of values that is likely to contain
/// the true value with a certain level of confidence. It's useful for expressing
/// the uncertainty in an estimate.
///
/// This method calculates a two-sided confidence interval, meaning it provides
/// both a lower and upper bound. The interval is centered around the median of
/// the distribution.
///
/// # Arguments
///
/// * `confidence_level` - A value between 0 and 1 representing the desired level of confidence.
/// Common values are 0.95 for a 95% confidence interval or 0.99 for a 99% confidence interval.
///
/// # Returns
///
/// A tuple (lower, upper) representing the lower and upper bounds of the confidence interval.
///
/// # Example
///
/// If you call `confidence_interval(0.95)` and get (100, 150), it means:
/// "We are 95% confident that the true value lies between 100 and 150."
///
/// # Note
///
/// The width of the interval indicates the precision of the estimate. A narrower interval
/// suggests a more precise estimate, while a wider interval indicates more uncertainty.
fn confidence_interval(&self, confidence_level: f64) -> (f64, f64) {
let lower = self.percentile_estimate((1.0 - confidence_level) / 2.0);
let upper = self.percentile_estimate(1.0 - (1.0 - confidence_level) / 2.0);
(lower.unwrap(), upper.unwrap())
}
/// Evaluates how well the distribution fits a given dataset in the context of estimation.
///
/// This method provides a measure of how accurately the distribution represents
/// the provided set of estimates or actual values.
///
/// # Arguments
///
/// * `data` - A slice of f64 values representing historical estimates or actual values.
///
/// # Returns
///
/// Returns an `EstimationFitQuality` struct containing various fit metrics.
///
/// # Errors
///
/// Returns an `Error` if there's an issue calculating the fit,
/// such as invalid data points or errors in percentile calculations.
fn evaluate_fit_quality(&self, data: &[f64]) -> Result<EstimationFitQuality> {
let n = data.len();
if n == 0 {
return Err(Error::InsufficientData {
required: 1,
provided: 0,
});
}
let mean = data.iter().sum::<f64>() / n as f64;
let median = self.median();
let mode = self.mode();
let mse = data.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / n as f64;
let rmse = mse.sqrt();
let mape = data.iter().map(|&x| ((x - mean) / x).abs()).sum::<f64>() / n as f64 * 100.0;
let within_50 = self.calculate_within_interval(data, 0.25, 0.75)?;
let within_68 = self.calculate_within_interval(data, 0.16, 0.84)?;
let within_95 = self.calculate_within_interval(data, 0.025, 0.975)?;
Ok(EstimationFitQuality {
mean_absolute_percentage_error: mape,
root_mean_square_error: rmse,
distribution_mean: mean,
distribution_median: median,
distribution_mode: mode,
within_50_percent: within_50,
within_68_percent: within_68,
within_95_percent: within_95,
sample_size: n,
})
}
/// Helper method to calculate the proportion of data within a given interval.
///
/// # Arguments
///
/// * `data` - A slice of f64 values representing the dataset.
/// * `lower_percentile` - The lower bound of the interval as a percentile (0.0 to 1.0).
/// * `upper_percentile` - The upper bound of the interval as a percentile (0.0 to 1.0).
///
/// # Returns
///
/// Returns the proportion of data points within the specified interval.
///
/// # Errors
///
/// Returns an `Error` if there's an issue calculating the percentiles.
fn calculate_within_interval(
&self,
data: &[f64],
lower_percentile: f64,
upper_percentile: f64,
) -> Result<f64> {
let lower_bound = self
.percentile_estimate(lower_percentile)
.map_err(|e| Error::FitQualityError(format!("Error calculating lower bound: {}", e)))?;
let upper_bound = self
.percentile_estimate(upper_percentile)
.map_err(|e| Error::FitQualityError(format!("Error calculating upper bound: {}", e)))?;
let count_within = data
.iter()
.filter(|&&x| x >= lower_bound && x <= upper_bound)
.count();
Ok(count_within as f64 / data.len() as f64)
}
}