scirs2_stats/
effect_size.rs

1//! Effect size measures for statistical analysis
2//!
3//! Effect sizes quantify the magnitude of a phenomenon, independent of sample size.
4//! They are essential for meta-analysis, power analysis, and reporting practical
5//! significance alongside statistical significance.
6//!
7//! ## Measures Provided
8//!
9//! ### Standardized Mean Differences
10//! - **Cohen's d** - Standardized difference between two group means (pooled SD)
11//! - **Hedges' g** - Bias-corrected version of Cohen's d for small samples
12//! - **Glass's delta** - Uses only the control group's SD as denominator
13//!
14//! ### Correlation-based
15//! - **Point-biserial correlation** - Correlation between a binary and continuous variable
16//!
17//! ### Variance-explained
18//! - **Eta-squared** - Proportion of variance explained (ANOVA)
19//! - **Partial eta-squared** - Proportion of variance explained controlling for other factors
20//! - **Omega-squared** - Less biased version of eta-squared
21//!
22//! ### Confidence Intervals
23//! - **CI for Cohen's d** - Noncentral t-distribution based
24//! - **CI for correlation** - Fisher z-transformation based
25
26use crate::error::{StatsError, StatsResult};
27use scirs2_core::ndarray::ArrayView1;
28use scirs2_core::numeric::{Float, NumCast};
29
30/// Result containing an effect size estimate with optional confidence interval
31#[derive(Debug, Clone)]
32pub struct EffectSizeResult<F: Float> {
33    /// Point estimate of the effect size
34    pub estimate: F,
35    /// Name of the effect size measure
36    pub measure: String,
37    /// Lower bound of the confidence interval (if computed)
38    pub ci_lower: Option<F>,
39    /// Upper bound of the confidence interval (if computed)
40    pub ci_upper: Option<F>,
41    /// Confidence level for the interval
42    pub confidence_level: Option<F>,
43}
44
45// ========================================================================
46// Helper: standard normal quantile (inverse CDF)
47// ========================================================================
48
49/// Inverse of the standard normal CDF (quantile function) using
50/// the rational approximation of Beasley & Springer (1977) / Moro (1995).
51fn normal_quantile(p: f64) -> f64 {
52    if p <= 0.0 {
53        return f64::NEG_INFINITY;
54    }
55    if p >= 1.0 {
56        return f64::INFINITY;
57    }
58    if (p - 0.5).abs() < 1e-15 {
59        return 0.0;
60    }
61
62    // Rational approximation
63    let a = [
64        -3.969683028665376e+01,
65        2.209460984245205e+02,
66        -2.759285104469687e+02,
67        1.383577518672690e+02,
68        -3.066479806614716e+01,
69        2.506628277459239e+00,
70    ];
71    let b = [
72        -5.447609879822406e+01,
73        1.615858368580409e+02,
74        -1.556989798598866e+02,
75        6.680131188771972e+01,
76        -1.328068155288572e+01,
77    ];
78    let c = [
79        -7.784894002430293e-03,
80        -3.223964580411365e-01,
81        -2.400758277161838e+00,
82        -2.549732539343734e+00,
83        4.374664141464968e+00,
84        2.938163982698783e+00,
85    ];
86    let d = [
87        7.784695709041462e-03,
88        3.224671290700398e-01,
89        2.445134137142996e+00,
90        3.754408661907416e+00,
91    ];
92
93    let p_low = 0.02425;
94    let p_high = 1.0 - p_low;
95
96    if p < p_low {
97        // Lower tail
98        let q = (-2.0 * p.ln()).sqrt();
99        (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5])
100            / ((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1.0)
101    } else if p <= p_high {
102        // Central region
103        let q = p - 0.5;
104        let r = q * q;
105        (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * q
106            / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1.0)
107    } else {
108        // Upper tail
109        let q = (-2.0 * (1.0 - p).ln()).sqrt();
110        -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5])
111            / ((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1.0)
112    }
113}
114
115/// Standard normal CDF (Abramowitz & Stegun approximation)
116fn normal_cdf_f64(x: f64) -> f64 {
117    if x < -8.0 {
118        return 0.0;
119    }
120    if x > 8.0 {
121        return 1.0;
122    }
123    let abs_x = x.abs();
124    let t = 1.0 / (1.0 + 0.2316419 * abs_x);
125    let d = 0.39894228040143268 * (-0.5 * x * x).exp();
126    let p = t
127        * (0.319381530
128            + t * (-0.356563782 + t * (1.781477937 + t * (-1.821255978 + t * 1.330274429))));
129    if x >= 0.0 {
130        1.0 - d * p
131    } else {
132        d * p
133    }
134}
135
136// ========================================================================
137// Cohen's d
138// ========================================================================
139
140/// Computes Cohen's d, the standardized mean difference between two groups.
141///
142/// d = (mean_x - mean_y) / pooled_sd
143///
144/// where pooled_sd = sqrt(((n1-1)*s1^2 + (n2-1)*s2^2) / (n1+n2-2))
145///
146/// # Arguments
147///
148/// * `x` - First sample
149/// * `y` - Second sample
150///
151/// # Returns
152///
153/// An `EffectSizeResult` with Cohen's d.
154///
155/// # Interpretation
156///
157/// |d| < 0.2 = small, |d| ~ 0.5 = medium, |d| > 0.8 = large (Cohen, 1988)
158///
159/// # Examples
160///
161/// ```
162/// use scirs2_core::ndarray::array;
163/// use scirs2_stats::effect_size::cohens_d;
164///
165/// let x = array![5.0, 6.0, 7.0, 8.0, 9.0];
166/// let y = array![3.0, 4.0, 5.0, 6.0, 7.0];
167///
168/// let result = cohens_d(&x.view(), &y.view()).expect("Cohen's d failed");
169/// assert!(result.estimate > 0.0); // x has larger mean
170/// ```
171pub fn cohens_d<F>(x: &ArrayView1<F>, y: &ArrayView1<F>) -> StatsResult<EffectSizeResult<F>>
172where
173    F: Float + std::iter::Sum<F> + NumCast + std::fmt::Display,
174{
175    if x.len() < 2 || y.len() < 2 {
176        return Err(StatsError::InvalidArgument(
177            "Both samples must have at least 2 observations for Cohen's d".to_string(),
178        ));
179    }
180
181    let (mean_x, var_x, n1) = sample_stats(x)?;
182    let (mean_y, var_y, n2) = sample_stats(y)?;
183
184    let pooled_var = ((n1 - 1.0) * var_x + (n2 - 1.0) * var_y) / (n1 + n2 - 2.0);
185    let pooled_sd = pooled_var.sqrt();
186
187    if pooled_sd < 1e-30 {
188        return Err(StatsError::ComputationError(
189            "Pooled standard deviation is zero; Cohen's d is undefined".to_string(),
190        ));
191    }
192
193    let d = (mean_x - mean_y) / pooled_sd;
194
195    Ok(EffectSizeResult {
196        estimate: F::from(d).unwrap_or(F::zero()),
197        measure: "Cohen's d".to_string(),
198        ci_lower: None,
199        ci_upper: None,
200        confidence_level: None,
201    })
202}
203
204// ========================================================================
205// Hedges' g (bias-corrected Cohen's d)
206// ========================================================================
207
208/// Computes Hedges' g, a bias-corrected version of Cohen's d.
209///
210/// g = d * J, where J = 1 - 3/(4*(n1+n2-2) - 1) is the correction factor.
211///
212/// For small samples (n < 20), Cohen's d has a slight upward bias.
213/// Hedges' g corrects for this.
214///
215/// # Arguments
216///
217/// * `x` - First sample
218/// * `y` - Second sample
219///
220/// # Returns
221///
222/// An `EffectSizeResult` with Hedges' g.
223pub fn hedges_g<F>(x: &ArrayView1<F>, y: &ArrayView1<F>) -> StatsResult<EffectSizeResult<F>>
224where
225    F: Float + std::iter::Sum<F> + NumCast + std::fmt::Display,
226{
227    if x.len() < 2 || y.len() < 2 {
228        return Err(StatsError::InvalidArgument(
229            "Both samples must have at least 2 observations for Hedges' g".to_string(),
230        ));
231    }
232
233    let d_result = cohens_d(x, y)?;
234    let d = <f64 as NumCast>::from(d_result.estimate).unwrap_or(0.0);
235    let df = (x.len() + y.len() - 2) as f64;
236
237    // Hedges' correction factor J
238    let j = 1.0 - 3.0 / (4.0 * df - 1.0);
239    let g = d * j;
240
241    Ok(EffectSizeResult {
242        estimate: F::from(g).unwrap_or(F::zero()),
243        measure: "Hedges' g".to_string(),
244        ci_lower: None,
245        ci_upper: None,
246        confidence_level: None,
247    })
248}
249
250// ========================================================================
251// Glass's delta
252// ========================================================================
253
254/// Computes Glass's delta, using the control group's standard deviation.
255///
256/// delta = (mean_treatment - mean_control) / sd_control
257///
258/// This is appropriate when the variances of the two groups are not expected
259/// to be equal, and there is a clear control group.
260///
261/// # Arguments
262///
263/// * `treatment` - Treatment group sample
264/// * `control` - Control group sample (its SD is used as the denominator)
265///
266/// # Returns
267///
268/// An `EffectSizeResult` with Glass's delta.
269pub fn glass_delta<F>(
270    treatment: &ArrayView1<F>,
271    control: &ArrayView1<F>,
272) -> StatsResult<EffectSizeResult<F>>
273where
274    F: Float + std::iter::Sum<F> + NumCast + std::fmt::Display,
275{
276    if treatment.len() < 2 || control.len() < 2 {
277        return Err(StatsError::InvalidArgument(
278            "Both samples must have at least 2 observations for Glass's delta".to_string(),
279        ));
280    }
281
282    let (mean_t, _, _) = sample_stats(treatment)?;
283    let (mean_c, var_c, _) = sample_stats(control)?;
284    let sd_c = var_c.sqrt();
285
286    if sd_c < 1e-30 {
287        return Err(StatsError::ComputationError(
288            "Control group standard deviation is zero; Glass's delta is undefined".to_string(),
289        ));
290    }
291
292    let delta = (mean_t - mean_c) / sd_c;
293
294    Ok(EffectSizeResult {
295        estimate: F::from(delta).unwrap_or(F::zero()),
296        measure: "Glass's delta".to_string(),
297        ci_lower: None,
298        ci_upper: None,
299        confidence_level: None,
300    })
301}
302
303// ========================================================================
304// Point-biserial correlation
305// ========================================================================
306
307/// Computes the point-biserial correlation between a binary variable and a continuous variable.
308///
309/// r_pb = (M1 - M0) / Sp * sqrt(n0 * n1 / n^2)
310///
311/// where M1, M0 are means of the continuous variable for the two groups,
312/// Sp is the pooled standard deviation, and n0, n1 are the group sizes.
313///
314/// # Arguments
315///
316/// * `binary` - Binary group labels (each element should be 0 or 1)
317/// * `continuous` - Continuous variable values
318///
319/// # Returns
320///
321/// An `EffectSizeResult` with the point-biserial correlation in [-1, 1].
322pub fn point_biserial_correlation<F>(
323    binary: &ArrayView1<F>,
324    continuous: &ArrayView1<F>,
325) -> StatsResult<EffectSizeResult<F>>
326where
327    F: Float + std::iter::Sum<F> + NumCast + std::fmt::Display,
328{
329    if binary.len() != continuous.len() {
330        return Err(StatsError::DimensionMismatch(
331            "Binary and continuous arrays must have equal length".to_string(),
332        ));
333    }
334    if binary.len() < 3 {
335        return Err(StatsError::InvalidArgument(
336            "At least 3 observations required for point-biserial correlation".to_string(),
337        ));
338    }
339
340    let n = binary.len();
341    let nf = n as f64;
342
343    // Separate into two groups
344    let mut group0: Vec<f64> = Vec::new();
345    let mut group1: Vec<f64> = Vec::new();
346
347    for i in 0..n {
348        let b = <f64 as NumCast>::from(binary[i]).unwrap_or(0.0);
349        let c = <f64 as NumCast>::from(continuous[i]).unwrap_or(0.0);
350        if (b - 0.0).abs() < 1e-10 {
351            group0.push(c);
352        } else if (b - 1.0).abs() < 1e-10 {
353            group1.push(c);
354        } else {
355            return Err(StatsError::InvalidArgument(format!(
356                "Binary variable must contain only 0 and 1, got {} at index {}",
357                b, i
358            )));
359        }
360    }
361
362    if group0.is_empty() || group1.is_empty() {
363        return Err(StatsError::InvalidArgument(
364            "Both groups must have at least one observation".to_string(),
365        ));
366    }
367
368    let n0 = group0.len() as f64;
369    let n1 = group1.len() as f64;
370    let mean0: f64 = group0.iter().sum::<f64>() / n0;
371    let mean1: f64 = group1.iter().sum::<f64>() / n1;
372
373    // Overall standard deviation (sample)
374    let overall_mean: f64 = continuous
375        .iter()
376        .map(|&v| <f64 as NumCast>::from(v).unwrap_or(0.0))
377        .sum::<f64>()
378        / nf;
379    let overall_var: f64 = continuous
380        .iter()
381        .map(|&v| {
382            let vf = <f64 as NumCast>::from(v).unwrap_or(0.0);
383            (vf - overall_mean) * (vf - overall_mean)
384        })
385        .sum::<f64>()
386        / (nf - 1.0);
387    let overall_sd = overall_var.sqrt();
388
389    if overall_sd < 1e-30 {
390        return Err(StatsError::ComputationError(
391            "Standard deviation is zero; point-biserial correlation is undefined".to_string(),
392        ));
393    }
394
395    let rpb = (mean1 - mean0) / overall_sd * (n0 * n1).sqrt() / nf;
396
397    Ok(EffectSizeResult {
398        estimate: F::from(rpb).unwrap_or(F::zero()),
399        measure: "Point-biserial correlation".to_string(),
400        ci_lower: None,
401        ci_upper: None,
402        confidence_level: None,
403    })
404}
405
406// ========================================================================
407// Eta-squared
408// ========================================================================
409
410/// Computes eta-squared from ANOVA-style sums of squares.
411///
412/// eta^2 = SS_between / SS_total
413///
414/// This is the proportion of total variance explained by the grouping factor.
415///
416/// # Arguments
417///
418/// * `groups` - Slice of array views, one per group
419///
420/// # Returns
421///
422/// An `EffectSizeResult` with eta-squared in [0, 1].
423///
424/// # Interpretation
425///
426/// 0.01 = small, 0.06 = medium, 0.14 = large (Cohen, 1988)
427pub fn eta_squared<F>(groups: &[ArrayView1<F>]) -> StatsResult<EffectSizeResult<F>>
428where
429    F: Float + std::iter::Sum<F> + NumCast + std::fmt::Display,
430{
431    if groups.len() < 2 {
432        return Err(StatsError::InvalidArgument(
433            "At least 2 groups required for eta-squared".to_string(),
434        ));
435    }
436    for (i, g) in groups.iter().enumerate() {
437        if g.is_empty() {
438            return Err(StatsError::InvalidArgument(format!("Group {} is empty", i)));
439        }
440    }
441
442    let (ss_between, ss_total) = compute_ss(groups)?;
443
444    if ss_total < 1e-30 {
445        return Err(StatsError::ComputationError(
446            "Total sum of squares is zero; eta-squared is undefined".to_string(),
447        ));
448    }
449
450    let eta2 = ss_between / ss_total;
451
452    Ok(EffectSizeResult {
453        estimate: F::from(eta2).unwrap_or(F::zero()),
454        measure: "Eta-squared".to_string(),
455        ci_lower: None,
456        ci_upper: None,
457        confidence_level: None,
458    })
459}
460
461// ========================================================================
462// Partial eta-squared
463// ========================================================================
464
465/// Computes partial eta-squared.
466///
467/// partial_eta^2 = SS_effect / (SS_effect + SS_error)
468///
469/// This is the proportion of variance attributable to the factor,
470/// after removing variance attributable to other factors.
471///
472/// # Arguments
473///
474/// * `ss_effect` - Sum of squares for the effect of interest
475/// * `ss_error` - Sum of squares for the error (residual)
476///
477/// # Returns
478///
479/// An `EffectSizeResult` with partial eta-squared in [0, 1].
480pub fn partial_eta_squared<F>(ss_effect: F, ss_error: F) -> StatsResult<EffectSizeResult<F>>
481where
482    F: Float + NumCast + std::fmt::Display,
483{
484    let ef = <f64 as NumCast>::from(ss_effect).unwrap_or(0.0);
485    let er = <f64 as NumCast>::from(ss_error).unwrap_or(0.0);
486
487    if ef < 0.0 || er < 0.0 {
488        return Err(StatsError::InvalidArgument(
489            "Sums of squares must be non-negative".to_string(),
490        ));
491    }
492    let denom = ef + er;
493    if denom < 1e-30 {
494        return Err(StatsError::ComputationError(
495            "SS_effect + SS_error is zero; partial eta-squared is undefined".to_string(),
496        ));
497    }
498
499    let partial_eta2 = ef / denom;
500
501    Ok(EffectSizeResult {
502        estimate: F::from(partial_eta2).unwrap_or(F::zero()),
503        measure: "Partial eta-squared".to_string(),
504        ci_lower: None,
505        ci_upper: None,
506        confidence_level: None,
507    })
508}
509
510// ========================================================================
511// Omega-squared
512// ========================================================================
513
514/// Computes omega-squared, a less biased alternative to eta-squared.
515///
516/// omega^2 = (SS_between - df_between * MS_error) / (SS_total + MS_error)
517///
518/// where MS_error = SS_within / df_within.
519///
520/// # Arguments
521///
522/// * `groups` - Slice of array views, one per group
523///
524/// # Returns
525///
526/// An `EffectSizeResult` with omega-squared.
527pub fn omega_squared<F>(groups: &[ArrayView1<F>]) -> StatsResult<EffectSizeResult<F>>
528where
529    F: Float + std::iter::Sum<F> + NumCast + std::fmt::Display,
530{
531    if groups.len() < 2 {
532        return Err(StatsError::InvalidArgument(
533            "At least 2 groups required for omega-squared".to_string(),
534        ));
535    }
536    for (i, g) in groups.iter().enumerate() {
537        if g.is_empty() {
538            return Err(StatsError::InvalidArgument(format!("Group {} is empty", i)));
539        }
540    }
541
542    let k = groups.len();
543    let n_total: usize = groups.iter().map(|g| g.len()).sum();
544
545    let (ss_between, ss_total) = compute_ss(groups)?;
546    let ss_within = ss_total - ss_between;
547
548    let df_between = (k - 1) as f64;
549    let df_within = (n_total - k) as f64;
550
551    if df_within < 1.0 {
552        return Err(StatsError::InvalidArgument(
553            "Insufficient degrees of freedom for omega-squared".to_string(),
554        ));
555    }
556
557    let ms_error = ss_within / df_within;
558    let denom = ss_total + ms_error;
559
560    if denom < 1e-30 {
561        return Err(StatsError::ComputationError(
562            "Denominator is zero; omega-squared is undefined".to_string(),
563        ));
564    }
565
566    let omega2 = (ss_between - df_between * ms_error) / denom;
567    // omega-squared can be negative (set to 0 if so)
568    let omega2 = omega2.max(0.0);
569
570    Ok(EffectSizeResult {
571        estimate: F::from(omega2).unwrap_or(F::zero()),
572        measure: "Omega-squared".to_string(),
573        ci_lower: None,
574        ci_upper: None,
575        confidence_level: None,
576    })
577}
578
579// ========================================================================
580// Confidence interval for Cohen's d
581// ========================================================================
582
583/// Computes Cohen's d with a confidence interval.
584///
585/// The CI is based on the noncentral t-distribution approximation:
586/// SE(d) ~ sqrt(1/n1 + 1/n2 + d^2/(2*(n1+n2)))
587///
588/// # Arguments
589///
590/// * `x` - First sample
591/// * `y` - Second sample
592/// * `confidence_level` - Confidence level (e.g., 0.95 for 95% CI)
593///
594/// # Returns
595///
596/// An `EffectSizeResult` with Cohen's d and confidence interval bounds.
597pub fn cohens_d_ci<F>(
598    x: &ArrayView1<F>,
599    y: &ArrayView1<F>,
600    confidence_level: F,
601) -> StatsResult<EffectSizeResult<F>>
602where
603    F: Float + std::iter::Sum<F> + NumCast + std::fmt::Display,
604{
605    let cl = <f64 as NumCast>::from(confidence_level).unwrap_or(0.95);
606    if cl <= 0.0 || cl >= 1.0 {
607        return Err(StatsError::InvalidArgument(format!(
608            "confidence_level must be in (0, 1), got {}",
609            cl
610        )));
611    }
612
613    let d_result = cohens_d(x, y)?;
614    let d = <f64 as NumCast>::from(d_result.estimate).unwrap_or(0.0);
615    let n1 = x.len() as f64;
616    let n2 = y.len() as f64;
617
618    // Approximate SE of d (Hedges & Olkin, 1985)
619    let se = (1.0 / n1 + 1.0 / n2 + d * d / (2.0 * (n1 + n2))).sqrt();
620
621    let alpha = 1.0 - cl;
622    let z = normal_quantile(1.0 - alpha / 2.0);
623
624    let lower = d - z * se;
625    let upper = d + z * se;
626
627    Ok(EffectSizeResult {
628        estimate: F::from(d).unwrap_or(F::zero()),
629        measure: "Cohen's d".to_string(),
630        ci_lower: Some(F::from(lower).unwrap_or(F::zero())),
631        ci_upper: Some(F::from(upper).unwrap_or(F::zero())),
632        confidence_level: Some(confidence_level),
633    })
634}
635
636// ========================================================================
637// Confidence interval for correlation
638// ========================================================================
639
640/// Computes a confidence interval for a Pearson correlation coefficient
641/// using Fisher's z-transformation.
642///
643/// z = atanh(r), SE(z) = 1/sqrt(n-3), CI on z, then transform back.
644///
645/// # Arguments
646///
647/// * `r` - The sample correlation coefficient
648/// * `n` - The sample size
649/// * `confidence_level` - Confidence level (e.g., 0.95)
650///
651/// # Returns
652///
653/// An `EffectSizeResult` with the correlation and CI bounds.
654pub fn correlation_ci<F>(r: F, n: usize, confidence_level: F) -> StatsResult<EffectSizeResult<F>>
655where
656    F: Float + NumCast + std::fmt::Display,
657{
658    let rf = <f64 as NumCast>::from(r).unwrap_or(0.0);
659    let cl = <f64 as NumCast>::from(confidence_level).unwrap_or(0.95);
660
661    if rf < -1.0 || rf > 1.0 {
662        return Err(StatsError::InvalidArgument(format!(
663            "Correlation must be in [-1, 1], got {}",
664            rf
665        )));
666    }
667    if n < 4 {
668        return Err(StatsError::InvalidArgument(
669            "Sample size must be at least 4 for correlation CI".to_string(),
670        ));
671    }
672    if cl <= 0.0 || cl >= 1.0 {
673        return Err(StatsError::InvalidArgument(format!(
674            "confidence_level must be in (0, 1), got {}",
675            cl
676        )));
677    }
678
679    // Fisher z-transformation
680    let z = 0.5 * ((1.0 + rf) / (1.0 - rf)).ln();
681    let se_z = 1.0 / ((n as f64 - 3.0).sqrt());
682
683    let alpha = 1.0 - cl;
684    let z_crit = normal_quantile(1.0 - alpha / 2.0);
685
686    let z_lower = z - z_crit * se_z;
687    let z_upper = z + z_crit * se_z;
688
689    // Back-transform
690    let r_lower = z_lower.tanh();
691    let r_upper = z_upper.tanh();
692
693    Ok(EffectSizeResult {
694        estimate: r,
695        measure: "Pearson correlation".to_string(),
696        ci_lower: Some(F::from(r_lower).unwrap_or(F::zero())),
697        ci_upper: Some(F::from(r_upper).unwrap_or(F::zero())),
698        confidence_level: Some(confidence_level),
699    })
700}
701
702// ========================================================================
703// Conversion between effect size measures
704// ========================================================================
705
706/// Converts Cohen's d to the correlation coefficient r.
707///
708/// r = d / sqrt(d^2 + 4)  (when n1 = n2)
709///
710/// More generally: r = d / sqrt(d^2 + (n1+n2)^2 / (n1*n2))
711///
712/// # Arguments
713///
714/// * `d` - Cohen's d value
715/// * `n1` - Size of first group
716/// * `n2` - Size of second group
717///
718/// # Returns
719///
720/// The equivalent Pearson correlation coefficient.
721pub fn d_to_r(d: f64, n1: usize, n2: usize) -> f64 {
722    let n1f = n1 as f64;
723    let n2f = n2 as f64;
724    let a = (n1f + n2f) * (n1f + n2f) / (n1f * n2f);
725    d / (d * d + a).sqrt()
726}
727
728/// Converts a Pearson correlation r to Cohen's d.
729///
730/// d = 2*r / sqrt(1 - r^2)  (when n1 = n2)
731///
732/// # Arguments
733///
734/// * `r` - Pearson correlation
735///
736/// # Returns
737///
738/// The equivalent Cohen's d.
739pub fn r_to_d(r: f64) -> f64 {
740    if (1.0 - r * r) < 1e-15 {
741        if r > 0.0 {
742            f64::INFINITY
743        } else {
744            f64::NEG_INFINITY
745        }
746    } else {
747        2.0 * r / (1.0 - r * r).sqrt()
748    }
749}
750
751// ========================================================================
752// Internal helpers
753// ========================================================================
754
755/// Compute sample mean, sample variance (ddof=1), and n as f64.
756fn sample_stats<F>(x: &ArrayView1<F>) -> StatsResult<(f64, f64, f64)>
757where
758    F: Float + std::iter::Sum<F> + NumCast,
759{
760    let n = x.len();
761    if n == 0 {
762        return Err(StatsError::InvalidArgument(
763            "Input array cannot be empty".to_string(),
764        ));
765    }
766    let nf = n as f64;
767    let mean: f64 = x
768        .iter()
769        .map(|&v| <f64 as NumCast>::from(v).unwrap_or(0.0))
770        .sum::<f64>()
771        / nf;
772    let var: f64 = x
773        .iter()
774        .map(|&v| {
775            let vf = <f64 as NumCast>::from(v).unwrap_or(0.0);
776            (vf - mean) * (vf - mean)
777        })
778        .sum::<f64>()
779        / (nf - 1.0).max(1.0);
780    Ok((mean, var, nf))
781}
782
783/// Compute SS_between and SS_total for ANOVA-style calculations.
784fn compute_ss<F>(groups: &[ArrayView1<F>]) -> StatsResult<(f64, f64)>
785where
786    F: Float + std::iter::Sum<F> + NumCast,
787{
788    // Grand mean
789    let mut grand_sum = 0.0_f64;
790    let mut n_total = 0_usize;
791    for g in groups {
792        for &v in g.iter() {
793            grand_sum += <f64 as NumCast>::from(v).unwrap_or(0.0);
794            n_total += 1;
795        }
796    }
797
798    if n_total == 0 {
799        return Err(StatsError::InvalidArgument(
800            "All groups are empty".to_string(),
801        ));
802    }
803
804    let grand_mean = grand_sum / n_total as f64;
805
806    // SS_total = sum of (x_ij - grand_mean)^2
807    let mut ss_total = 0.0_f64;
808    for g in groups {
809        for &v in g.iter() {
810            let vf = <f64 as NumCast>::from(v).unwrap_or(0.0);
811            ss_total += (vf - grand_mean) * (vf - grand_mean);
812        }
813    }
814
815    // SS_between = sum of n_i * (mean_i - grand_mean)^2
816    let mut ss_between = 0.0_f64;
817    for g in groups {
818        let ni = g.len() as f64;
819        let group_mean: f64 = g
820            .iter()
821            .map(|&v| <f64 as NumCast>::from(v).unwrap_or(0.0))
822            .sum::<f64>()
823            / ni;
824        ss_between += ni * (group_mean - grand_mean) * (group_mean - grand_mean);
825    }
826
827    Ok((ss_between, ss_total))
828}
829
830// ========================================================================
831// Tests
832// ========================================================================
833
834#[cfg(test)]
835mod tests {
836    use super::*;
837    use scirs2_core::ndarray::array;
838
839    #[test]
840    fn test_cohens_d_basic() {
841        let x = array![5.0, 6.0, 7.0, 8.0, 9.0];
842        let y = array![3.0, 4.0, 5.0, 6.0, 7.0];
843
844        let result = cohens_d(&x.view(), &y.view()).expect("Cohen's d failed");
845        // Mean diff = 2.0, pooled_sd = sqrt(2.5) ~ 1.58, d ~ 1.26
846        assert!(result.estimate > 1.0);
847        assert!(result.estimate < 2.0);
848    }
849
850    #[test]
851    fn test_cohens_d_equal_means() {
852        let x = array![1.0, 2.0, 3.0, 4.0, 5.0];
853        let y = array![1.0, 2.0, 3.0, 4.0, 5.0];
854
855        let result = cohens_d(&x.view(), &y.view()).expect("Cohen's d failed");
856        assert!(result.estimate.abs() < 1e-10);
857    }
858
859    #[test]
860    fn test_cohens_d_too_small() {
861        let x = array![1.0];
862        let y = array![2.0, 3.0];
863        assert!(cohens_d(&x.view(), &y.view()).is_err());
864    }
865
866    #[test]
867    fn test_hedges_g_correction() {
868        let x = array![5.0, 6.0, 7.0, 8.0, 9.0];
869        let y = array![3.0, 4.0, 5.0, 6.0, 7.0];
870
871        let d = cohens_d(&x.view(), &y.view()).expect("d failed");
872        let g = hedges_g(&x.view(), &y.view()).expect("g failed");
873
874        // Hedges' g should be slightly smaller than Cohen's d (bias correction)
875        assert!(g.estimate.abs() < d.estimate.abs() + 1e-10);
876        assert!(g.estimate.abs() > 0.0);
877    }
878
879    #[test]
880    fn test_glass_delta_basic() {
881        let treatment = array![6.0, 7.0, 8.0, 9.0, 10.0];
882        let control = array![1.0, 2.0, 3.0, 4.0, 5.0];
883
884        let result = glass_delta(&treatment.view(), &control.view()).expect("Glass's delta failed");
885        // mean_t = 8, mean_c = 3, sd_c = sqrt(2.5) ~ 1.58, delta ~ 3.16
886        assert!(result.estimate > 2.0);
887    }
888
889    #[test]
890    fn test_glass_delta_symmetric() {
891        let a = array![1.0, 2.0, 3.0, 4.0, 5.0];
892        let b = array![3.0, 4.0, 5.0, 6.0, 7.0];
893
894        let delta_ab = glass_delta(&a.view(), &b.view()).expect("delta AB failed");
895        let delta_ba = glass_delta(&b.view(), &a.view()).expect("delta BA failed");
896
897        // Note: Glass's delta is NOT symmetric because it uses only the control group's SD
898        assert!(delta_ab.estimate < 0.0);
899        assert!(delta_ba.estimate > 0.0);
900    }
901
902    #[test]
903    fn test_point_biserial_basic() {
904        let binary = array![0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0];
905        let continuous = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
906
907        let result =
908            point_biserial_correlation(&binary.view(), &continuous.view()).expect("rpb failed");
909        assert!(result.estimate > 0.0);
910        assert!(result.estimate <= 1.0);
911    }
912
913    #[test]
914    fn test_point_biserial_no_correlation() {
915        let binary = array![0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0];
916        let continuous = array![5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0];
917
918        let result = point_biserial_correlation(&binary.view(), &continuous.view());
919        // All values same => SD = 0 => error
920        assert!(result.is_err());
921    }
922
923    #[test]
924    fn test_point_biserial_invalid_binary() {
925        let binary = array![0.0, 1.0, 2.0];
926        let continuous = array![1.0, 2.0, 3.0];
927        let result = point_biserial_correlation(&binary.view(), &continuous.view());
928        assert!(result.is_err());
929    }
930
931    #[test]
932    fn test_eta_squared_basic() {
933        let g1 = array![1.0, 2.0, 3.0];
934        let g2 = array![4.0, 5.0, 6.0];
935        let g3 = array![7.0, 8.0, 9.0];
936
937        let groups = vec![g1.view(), g2.view(), g3.view()];
938        let result = eta_squared(&groups).expect("eta2 failed");
939
940        assert!(result.estimate > 0.0);
941        assert!(result.estimate <= 1.0);
942        // Groups are well separated, eta2 should be high
943        assert!(result.estimate > 0.8);
944    }
945
946    #[test]
947    fn test_eta_squared_no_effect() {
948        let g1 = array![5.0, 5.0, 5.0];
949        let g2 = array![5.0, 5.0, 5.0];
950
951        let groups = vec![g1.view(), g2.view()];
952        let result = eta_squared(&groups);
953        // All values identical => SS_total = 0 => error
954        assert!(result.is_err());
955    }
956
957    #[test]
958    fn test_partial_eta_squared() {
959        let result = partial_eta_squared(10.0_f64, 40.0_f64).expect("partial eta2 failed");
960        assert!((result.estimate - 0.2).abs() < 1e-10); // 10/(10+40) = 0.2
961    }
962
963    #[test]
964    fn test_partial_eta_squared_zero() {
965        let result = partial_eta_squared(0.0_f64, 0.0_f64);
966        assert!(result.is_err());
967    }
968
969    #[test]
970    fn test_omega_squared_basic() {
971        let g1 = array![1.0, 2.0, 3.0, 4.0, 5.0];
972        let g2 = array![6.0, 7.0, 8.0, 9.0, 10.0];
973
974        let groups = vec![g1.view(), g2.view()];
975        let result = omega_squared(&groups).expect("omega2 failed");
976
977        assert!(result.estimate > 0.0);
978        assert!(result.estimate <= 1.0);
979        // Should be slightly less than eta-squared
980        let eta2_result = eta_squared(&groups).expect("eta2 failed");
981        assert!(result.estimate <= eta2_result.estimate + 1e-10);
982    }
983
984    #[test]
985    fn test_cohens_d_ci() {
986        let x = array![5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0];
987        let y = array![3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
988
989        let result = cohens_d_ci(&x.view(), &y.view(), 0.95).expect("CI failed");
990
991        assert!(result.ci_lower.is_some());
992        assert!(result.ci_upper.is_some());
993
994        let lower = result.ci_lower.expect("no lower bound");
995        let upper = result.ci_upper.expect("no upper bound");
996        assert!(lower < result.estimate);
997        assert!(upper > result.estimate);
998        assert!((result.confidence_level.expect("no CL") - 0.95).abs() < 1e-10);
999    }
1000
1001    #[test]
1002    fn test_correlation_ci() {
1003        let result = correlation_ci(0.5_f64, 100, 0.95).expect("corr CI failed");
1004        let lower = result.ci_lower.expect("no lower");
1005        let upper = result.ci_upper.expect("no upper");
1006
1007        assert!(lower < 0.5);
1008        assert!(upper > 0.5);
1009        assert!(lower > 0.0); // r=0.5 with n=100 should have positive lower bound
1010    }
1011
1012    #[test]
1013    fn test_correlation_ci_invalid() {
1014        assert!(correlation_ci(1.5_f64, 100, 0.95).is_err());
1015        assert!(correlation_ci(0.5_f64, 2, 0.95).is_err());
1016        assert!(correlation_ci(0.5_f64, 100, 0.0).is_err());
1017    }
1018
1019    #[test]
1020    fn test_d_to_r_and_back() {
1021        let d = 0.8;
1022        let r = d_to_r(d, 50, 50);
1023        assert!(r > 0.0 && r < 1.0);
1024        // Approximate round-trip
1025        let d_back = r_to_d(r);
1026        assert!((d_back - d).abs() < 0.1); // approximate due to different formulas
1027    }
1028
1029    #[test]
1030    fn test_r_to_d_extremes() {
1031        assert!(r_to_d(0.0).abs() < 1e-10);
1032        assert!(r_to_d(0.999) > 10.0);
1033        assert!(r_to_d(-0.999) < -10.0);
1034    }
1035
1036    #[test]
1037    fn test_normal_quantile() {
1038        assert!((normal_quantile(0.5) - 0.0).abs() < 1e-10);
1039        assert!((normal_quantile(0.975) - 1.96).abs() < 0.01);
1040        assert!((normal_quantile(0.025) + 1.96).abs() < 0.01);
1041    }
1042
1043    #[test]
1044    fn test_effect_size_result_fields() {
1045        let x = array![5.0, 6.0, 7.0, 8.0, 9.0];
1046        let y = array![3.0, 4.0, 5.0, 6.0, 7.0];
1047        let result = cohens_d(&x.view(), &y.view()).expect("d failed");
1048        assert_eq!(result.measure, "Cohen's d");
1049        assert!(result.ci_lower.is_none());
1050        assert!(result.ci_upper.is_none());
1051    }
1052}
scirs2_stats/effect_size.rs

scirs2_stats/
effect_size.rs