Skip to main content

scirs2_stats/distributions/
pareto.rs

1//! Pareto distribution functions
2//!
3//! This module provides functionality for the Pareto distribution.
4
5use crate::error::{StatsError, StatsResult};
6use crate::sampling::SampleableDistribution;
7use scirs2_core::numeric::{Float, NumCast};
8use scirs2_core::random::prelude::*;
9use scirs2_core::random::{Distribution, Uniform as RandUniform};
10
11/// Pareto distribution structure
12///
13/// The Pareto distribution is a power-law probability distribution that is used to
14/// model many types of observable phenomena, particularly those exhibiting the
15/// "80-20 rule" (Pareto principle).
16pub struct Pareto<F: Float> {
17    /// Shape parameter (alpha > 0)
18    pub shape: F,
19    /// Scale parameter (x_m > 0)
20    pub scale: F,
21    /// Location parameter (default: 0)
22    pub loc: F,
23    /// Random number generator for uniform distribution
24    rand_distr: RandUniform<f64>,
25}
26
27impl<F: Float + NumCast + std::fmt::Display> Pareto<F> {
28    /// Create a new Pareto distribution with given parameters
29    ///
30    /// # Arguments
31    ///
32    /// * `shape` - Shape parameter (alpha > 0)
33    /// * `scale` - Scale parameter (x_m > 0)
34    /// * `loc` - Location parameter (default: 0)
35    ///
36    /// # Returns
37    ///
38    /// * A new Pareto distribution instance
39    ///
40    /// # Examples
41    ///
42    /// ```
43    /// use scirs2_stats::distributions::pareto::Pareto;
44    ///
45    /// let pareto = Pareto::new(3.0f64, 1.0, 0.0).expect("Operation failed");
46    /// ```
47    pub fn new(shape: F, scale: F, loc: F) -> StatsResult<Self> {
48        // Validate parameters
49        if shape <= F::zero() {
50            return Err(StatsError::DomainError(
51                "Shape parameter must be positive".to_string(),
52            ));
53        }
54
55        if scale <= F::zero() {
56            return Err(StatsError::DomainError(
57                "Scale parameter must be positive".to_string(),
58            ));
59        }
60
61        // Create RNG for uniform distribution in [0, 1)
62        let rand_distr = match RandUniform::new(0.0, 1.0) {
63            Ok(distr) => distr,
64            Err(_) => {
65                return Err(StatsError::ComputationError(
66                    "Failed to create uniform distribution for sampling".to_string(),
67                ))
68            }
69        };
70
71        Ok(Pareto {
72            shape,
73            scale,
74            loc,
75            rand_distr,
76        })
77    }
78
79    /// Calculate the probability density function (PDF) at a given point
80    ///
81    /// # Arguments
82    ///
83    /// * `x` - The point at which to evaluate the PDF
84    ///
85    /// # Returns
86    ///
87    /// * The value of the PDF at the given point
88    ///
89    /// # Examples
90    ///
91    /// ```
92    /// use scirs2_stats::distributions::pareto::Pareto;
93    ///
94    /// let pareto = Pareto::new(3.0f64, 1.0, 0.0).expect("Operation failed");
95    /// let pdf_at_two = pareto.pdf(2.0);
96    /// assert!((pdf_at_two - 0.1875).abs() < 1e-7);
97    /// ```
98    pub fn pdf(&self, x: F) -> F {
99        // Adjust x by location parameter
100        let x_adjusted = x - self.loc;
101
102        // For x < scale + loc, PDF is 0
103        if x_adjusted < self.scale {
104            return F::zero();
105        }
106
107        // PDF = (shape / scale) * (scale / (x - loc))^(shape + 1)
108        let ratio = self.scale / x_adjusted;
109        let shape_plus_one = self.shape + F::one();
110        self.shape / self.scale * ratio.powf(shape_plus_one)
111    }
112
113    /// Calculate the cumulative distribution function (CDF) at a given point
114    ///
115    /// # Arguments
116    ///
117    /// * `x` - The point at which to evaluate the CDF
118    ///
119    /// # Returns
120    ///
121    /// * The value of the CDF at the given point
122    ///
123    /// # Examples
124    ///
125    /// ```
126    /// use scirs2_stats::distributions::pareto::Pareto;
127    ///
128    /// let pareto = Pareto::new(3.0f64, 1.0, 0.0).expect("Operation failed");
129    /// let cdf_at_two = pareto.cdf(2.0);
130    /// assert!((cdf_at_two - 0.875).abs() < 1e-7);
131    /// ```
132    pub fn cdf(&self, x: F) -> F {
133        // Adjust x by location parameter
134        let x_adjusted = x - self.loc;
135
136        // For x <= scale + loc, CDF is 0
137        if x_adjusted <= self.scale {
138            return F::zero();
139        }
140
141        // CDF = 1 - (scale / (x - loc))^shape
142        let ratio = self.scale / x_adjusted;
143        F::one() - ratio.powf(self.shape)
144    }
145
146    /// Inverse of the cumulative distribution function (quantile function)
147    ///
148    /// # Arguments
149    ///
150    /// * `p` - Probability value (between 0 and 1)
151    ///
152    /// # Returns
153    ///
154    /// * The value x such that CDF(x) = p
155    ///
156    /// # Examples
157    ///
158    /// ```
159    /// use scirs2_stats::distributions::pareto::Pareto;
160    ///
161    /// let pareto = Pareto::new(3.0f64, 1.0, 0.0).expect("Operation failed");
162    /// let x = pareto.ppf(0.5).expect("Operation failed");
163    /// assert!((x - 1.2599210).abs() < 1e-6);
164    /// ```
165    pub fn ppf(&self, p: F) -> StatsResult<F> {
166        if p < F::zero() || p > F::one() {
167            return Err(StatsError::DomainError(
168                "Probability must be between 0 and 1".to_string(),
169            ));
170        }
171
172        // Special cases
173        if p == F::zero() {
174            return Ok(self.scale + self.loc);
175        }
176        if p == F::one() {
177            return Ok(F::infinity());
178        }
179
180        // Compute the quantile directly from the formula
181        // x = scale / (1 - p)^(1/shape) + loc
182        let one = F::one();
183        let one_minus_p = one - p;
184        let pow = one_minus_p.powf(one / self.shape);
185        let quantile = self.scale / pow + self.loc;
186
187        Ok(quantile)
188    }
189
190    /// Generate random samples from the distribution
191    ///
192    /// # Arguments
193    ///
194    /// * `size` - Number of samples to generate
195    ///
196    /// # Returns
197    ///
198    /// * Vector of random samples
199    ///
200    /// # Examples
201    ///
202    /// ```
203    /// use scirs2_stats::distributions::pareto::Pareto;
204    ///
205    /// let pareto = Pareto::new(3.0f64, 1.0, 0.0).expect("Operation failed");
206    /// let samples = pareto.rvs(10).expect("Operation failed");
207    /// assert_eq!(samples.len(), 10);
208    /// ```
209    pub fn rvs(&self, size: usize) -> StatsResult<Vec<F>> {
210        let mut rng = thread_rng();
211        let mut samples = Vec::with_capacity(size);
212
213        // Generate samples using the inverse transform sampling method
214        for _ in 0..size {
215            // Generate uniform random number in [0, 1)
216            let u = self.rand_distr.sample(&mut rng);
217            let u_f = F::from(u).expect("Failed to convert to float");
218
219            // Apply inverse CDF transform
220            let sample = self.ppf(u_f)?;
221            samples.push(sample);
222        }
223
224        Ok(samples)
225    }
226
227    /// Calculate the mean of the distribution
228    ///
229    /// # Returns
230    ///
231    /// * The mean of the distribution
232    ///
233    /// # Examples
234    ///
235    /// ```
236    /// use scirs2_stats::distributions::pareto::Pareto;
237    ///
238    /// let pareto = Pareto::new(3.0f64, 1.0, 0.0).expect("Operation failed");
239    /// let mean = pareto.mean();
240    /// assert!((mean - 1.5).abs() < 1e-7);
241    /// ```
242    pub fn mean(&self) -> F {
243        // Mean is only defined for shape > 1
244        if self.shape <= F::one() {
245            return F::infinity();
246        }
247
248        // Mean = (shape * scale) / (shape - 1) + loc
249        let shape_minus_one = self.shape - F::one();
250        (self.shape * self.scale) / shape_minus_one + self.loc
251    }
252
253    /// Calculate the variance of the distribution
254    ///
255    /// # Returns
256    ///
257    /// * The variance of the distribution
258    ///
259    /// # Examples
260    ///
261    /// ```
262    /// use scirs2_stats::distributions::pareto::Pareto;
263    ///
264    /// let pareto = Pareto::new(3.0f64, 1.0, 0.0).expect("Operation failed");
265    /// let var = pareto.var();
266    /// assert!((var - 0.75).abs() < 1e-7);
267    /// ```
268    pub fn var(&self) -> F {
269        // Variance is only defined for shape > 2
270        if self.shape <= F::from(2.0).expect("Failed to convert constant to float") {
271            return F::infinity();
272        }
273
274        // Variance = (scale^2 * shape) / ((shape - 1)^2 * (shape - 2))
275        let one = F::one();
276        let two = F::from(2.0).expect("Failed to convert constant to float");
277        let shape_minus_one = self.shape - one;
278        let shape_minus_two = self.shape - two;
279
280        (self.scale * self.scale * self.shape)
281            / (shape_minus_one * shape_minus_one * shape_minus_two)
282    }
283
284    /// Calculate the median of the distribution
285    ///
286    /// # Returns
287    ///
288    /// * The median of the distribution
289    ///
290    /// # Examples
291    ///
292    /// ```
293    /// use scirs2_stats::distributions::pareto::Pareto;
294    ///
295    /// let pareto = Pareto::new(3.0f64, 1.0, 0.0).expect("Operation failed");
296    /// let median = pareto.median();
297    /// assert!((median - 1.2599210).abs() < 1e-6);
298    /// ```
299    pub fn median(&self) -> F {
300        // Median = scale * 2^(1/shape) + loc
301        let two = F::from(2.0).expect("Failed to convert constant to float");
302        let two_pow = two.powf(F::one() / self.shape);
303        self.scale * two_pow + self.loc
304    }
305
306    /// Calculate the mode of the distribution
307    ///
308    /// # Returns
309    ///
310    /// * The mode of the distribution
311    ///
312    /// # Examples
313    ///
314    /// ```
315    /// use scirs2_stats::distributions::pareto::Pareto;
316    ///
317    /// let pareto = Pareto::new(3.0f64, 1.0, 0.0).expect("Operation failed");
318    /// let mode = pareto.mode();
319    /// assert!((mode - 1.0).abs() < 1e-7);
320    /// ```
321    pub fn mode(&self) -> F {
322        // Mode = scale + loc
323        self.scale + self.loc
324    }
325}
326
327/// Create a Pareto distribution with the given parameters.
328///
329/// This is a convenience function to create a Pareto distribution with
330/// the given shape, scale, and location parameters.
331///
332/// # Arguments
333///
334/// * `shape` - Shape parameter (alpha > 0)
335/// * `scale` - Scale parameter (x_m > 0)
336/// * `loc` - Location parameter (default: 0)
337///
338/// # Returns
339///
340/// * A Pareto distribution object
341///
342/// # Examples
343///
344/// ```
345/// use scirs2_stats::distributions::pareto;
346///
347/// let p = pareto::pareto(3.0f64, 1.0, 0.0).expect("Operation failed");
348/// let pdf_at_two = p.pdf(2.0);
349/// assert!((pdf_at_two - 0.1875).abs() < 1e-7);
350/// ```
351#[allow(dead_code)]
352pub fn pareto<F>(shape: F, scale: F, loc: F) -> StatsResult<Pareto<F>>
353where
354    F: Float + NumCast + std::fmt::Display,
355{
356    Pareto::new(shape, scale, loc)
357}
358
359/// Implementation of SampleableDistribution for Pareto
360impl<F: Float + NumCast + std::fmt::Display> SampleableDistribution<F> for Pareto<F> {
361    fn rvs(&self, size: usize) -> StatsResult<Vec<F>> {
362        self.rvs(size)
363    }
364}
365
366#[cfg(test)]
367mod tests {
368    use super::*;
369    use approx::assert_relative_eq;
370
371    #[test]
372    fn test_pareto_creation() {
373        // Standard Pareto (shape=1, scale=1, loc=0)
374        let pareto1 = Pareto::new(1.0, 1.0, 0.0).expect("Operation failed");
375        assert_eq!(pareto1.shape, 1.0);
376        assert_eq!(pareto1.scale, 1.0);
377        assert_eq!(pareto1.loc, 0.0);
378
379        // Pareto with shape=3
380        let pareto3 = Pareto::new(3.0, 1.0, 0.0).expect("Operation failed");
381        assert_eq!(pareto3.shape, 3.0);
382        assert_eq!(pareto3.scale, 1.0);
383        assert_eq!(pareto3.loc, 0.0);
384
385        // Custom Pareto
386        let custom = Pareto::new(2.5, 2.0, 1.0).expect("Operation failed");
387        assert_eq!(custom.shape, 2.5);
388        assert_eq!(custom.scale, 2.0);
389        assert_eq!(custom.loc, 1.0);
390
391        // Error cases
392        assert!(Pareto::<f64>::new(0.0, 1.0, 0.0).is_err());
393        assert!(Pareto::<f64>::new(-1.0, 1.0, 0.0).is_err());
394        assert!(Pareto::<f64>::new(1.0, 0.0, 0.0).is_err());
395        assert!(Pareto::<f64>::new(1.0, -1.0, 0.0).is_err());
396    }
397
398    #[test]
399    fn test_pareto_pdf() {
400        // Pareto with shape=3, scale=1, loc=0
401        let pareto = Pareto::new(3.0, 1.0, 0.0).expect("Operation failed");
402
403        // PDF at x = 1 (boundary) should be α/scale = 3.0
404        let pdf_at_one = pareto.pdf(1.0);
405        assert_relative_eq!(pdf_at_one, 3.0, epsilon = 1e-10);
406
407        // PDF at x = 2 should be 3 * (1/2)^4 = 3 * 1/16 = 0.1875
408        let pdf_at_two = pareto.pdf(2.0);
409        assert_relative_eq!(pdf_at_two, 0.1875, epsilon = 1e-7);
410
411        // PDF at values less than scale should be 0
412        let pdf_at_half = pareto.pdf(0.5);
413        assert_eq!(pdf_at_half, 0.0);
414
415        // Custom Pareto with location
416        let custom = Pareto::new(3.0, 1.0, 1.0).expect("Operation failed");
417
418        // PDF at x = 2 (with loc=1) should equal pareto.pdf(1.0) = α/scale = 3.0
419        let pdf_at_two_loc = custom.pdf(2.0);
420        assert_relative_eq!(pdf_at_two_loc, 3.0, epsilon = 1e-10);
421
422        // PDF at x = 3 (with loc=1) should equal pareto.pdf(2.0)
423        let pdf_at_three_loc = custom.pdf(3.0);
424        assert_relative_eq!(pdf_at_three_loc, 0.1875, epsilon = 1e-7);
425    }
426
427    #[test]
428    fn test_pareto_cdf() {
429        // Pareto with shape=3, scale=1, loc=0
430        let pareto = Pareto::new(3.0, 1.0, 0.0).expect("Operation failed");
431
432        // CDF at x = 1 should be 0
433        let cdf_at_one = pareto.cdf(1.0);
434        assert_eq!(cdf_at_one, 0.0);
435
436        // CDF at x = 2 should be 1 - (1/2)^3 = 1 - 1/8 = 0.875
437        let cdf_at_two = pareto.cdf(2.0);
438        assert_relative_eq!(cdf_at_two, 0.875, epsilon = 1e-7);
439
440        // CDF at values less than scale should be 0
441        let cdf_at_half = pareto.cdf(0.5);
442        assert_eq!(cdf_at_half, 0.0);
443
444        // Custom Pareto with location
445        let custom = Pareto::new(3.0, 1.0, 1.0).expect("Operation failed");
446
447        // CDF at x = 2 (with loc=1) should equal pareto.cdf(1.0)
448        let cdf_at_two_loc = custom.cdf(2.0);
449        assert_eq!(cdf_at_two_loc, 0.0);
450
451        // CDF at x = 3 (with loc=1) should equal pareto.cdf(2.0)
452        let cdf_at_three_loc = custom.cdf(3.0);
453        assert_relative_eq!(cdf_at_three_loc, 0.875, epsilon = 1e-7);
454    }
455
456    #[test]
457    fn test_pareto_ppf() {
458        // Pareto with shape=3, scale=1, loc=0
459        let pareto = Pareto::new(3.0, 1.0, 0.0).expect("Operation failed");
460
461        // PPF at p = 0 should be scale = 1.0
462        let ppf_at_zero = pareto.ppf(0.0).expect("Operation failed");
463        assert_eq!(ppf_at_zero, 1.0);
464
465        // PPF at p = 0.5 should be scale / (1 - 0.5)^(1/shape) = 1 / 0.5^(1/3) ≈ 1.2599210
466        let ppf_at_half = pareto.ppf(0.5).expect("Operation failed");
467        assert_relative_eq!(ppf_at_half, 1.2599210, epsilon = 1e-6);
468
469        // PPF at p = 0.875 should be close to 2.0 (inverse of CDF at x = 2.0)
470        let ppf_at_875 = pareto.ppf(0.875).expect("Operation failed");
471        assert_relative_eq!(ppf_at_875, 2.0, epsilon = 1e-6);
472
473        // Custom Pareto with location
474        let custom = Pareto::new(3.0, 1.0, 1.0).expect("Operation failed");
475
476        // PPF at p = 0.5 should be pareto.ppf(0.5) + loc
477        let ppf_at_half_loc = custom.ppf(0.5).expect("Operation failed");
478        assert_relative_eq!(ppf_at_half_loc, ppf_at_half + 1.0, epsilon = 1e-6);
479
480        // Error cases
481        assert!(pareto.ppf(-0.1).is_err());
482        assert!(pareto.ppf(1.1).is_err());
483    }
484
485    #[test]
486    fn test_pareto_statistics() {
487        // Pareto with shape=3, scale=1, loc=0
488        let pareto = Pareto::new(3.0, 1.0, 0.0).expect("Operation failed");
489
490        // Mean should be (shape * scale) / (shape - 1) = 3 / 2 = 1.5
491        let mean = pareto.mean();
492        assert_relative_eq!(mean, 1.5, epsilon = 1e-7);
493
494        // Variance should be (scale^2 * shape) / ((shape - 1)^2 * (shape - 2))
495        // = 1^2 * 3 / (2^2 * 1) = 3 / 4 = 0.75
496        let var = pareto.var();
497        assert_relative_eq!(var, 0.75, epsilon = 1e-7);
498
499        // Median should be scale * 2^(1/shape) = 1 * 2^(1/3) ≈ 1.2599210
500        let median = pareto.median();
501        assert_relative_eq!(median, 1.2599210, epsilon = 1e-6);
502
503        // Mode should be scale = 1.0
504        let mode = pareto.mode();
505        assert_eq!(mode, 1.0);
506
507        // Pareto with shape=1 (mean is not defined)
508        let pareto1 = Pareto::new(1.0, 1.0, 0.0).expect("Operation failed");
509        assert!(pareto1.mean().is_infinite());
510        assert!(pareto1.var().is_infinite());
511
512        // Pareto with shape=1.5 (variance is not defined but mean is)
513        let pareto15 = Pareto::new(1.5, 1.0, 0.0).expect("Operation failed");
514        assert!(!pareto15.mean().is_infinite());
515        assert!(pareto15.var().is_infinite());
516
517        // Custom Pareto with location
518        let custom = Pareto::new(3.0, 1.0, 1.0).expect("Operation failed");
519
520        // Mean should be pareto.mean() + loc
521        let mean_loc = custom.mean();
522        assert_relative_eq!(mean_loc, mean + 1.0, epsilon = 1e-7);
523
524        // Variance should be same as pareto.var()
525        let var_loc = custom.var();
526        assert_relative_eq!(var_loc, var, epsilon = 1e-7);
527    }
528
529    #[test]
530    fn test_pareto_rvs() {
531        let pareto = Pareto::new(3.0, 1.0, 0.0).expect("Operation failed");
532
533        // Generate samples
534        let samples = pareto.rvs(1000).expect("Operation failed");
535
536        // Check the number of samples
537        assert_eq!(samples.len(), 1000);
538
539        // Basic check (all samples should be >= scale)
540        for &sample in &samples {
541            assert!(sample >= 1.0);
542        }
543
544        // Basic statistical checks
545        let sum: f64 = samples.iter().sum();
546        let mean = sum / 1000.0;
547
548        // Mean should be close to true mean (within reason for random samples)
549        // True mean for Pareto(3, 1, 0) is 1.5
550        assert!((mean - 1.5).abs() < 0.2);
551
552        // Calculate sample median as a sanity check
553        let mut sorted_samples = samples.clone();
554        sorted_samples.sort_by(|a, b| a.partial_cmp(b).expect("Operation failed"));
555        let median = if samples.len() % 2 == 0 {
556            (sorted_samples[499] + sorted_samples[500]) / 2.0
557        } else {
558            sorted_samples[500]
559        };
560
561        // Median should be close to 1.2599210 (within reason for random samples)
562        assert!((median - 1.2599210).abs() < 0.2);
563    }
564}