Skip to main content

cyanea_stats/
multivariate.rs

1//! Multivariate statistical tests for community ecology.
2//!
3//! - **PERMANOVA** — permutational multivariate analysis of variance
4//! - **ANOSIM** — analysis of similarities
5//! - **Mantel test** — correlation between distance matrices
6//! - **AMOVA** — analysis of molecular variance
7//! - **BIOENV** — best subset of environmental variables explaining community structure
8
9use cyanea_core::{CyaneaError, Result};
10
11use crate::correlation;
12use crate::rank::{rank, RankMethod};
13
14// ── PERMANOVA ───────────────────────────────────────────────────────────────
15
16/// Result of PERMANOVA.
17#[derive(Debug, Clone)]
18pub struct PermanovaResult {
19    /// Pseudo-F statistic.
20    pub f_statistic: f64,
21    /// Permutation p-value.
22    pub p_value: f64,
23    /// Number of permutations performed.
24    pub n_permutations: usize,
25    /// R² (proportion of variance explained by grouping).
26    pub r_squared: f64,
27    /// Number of groups.
28    pub n_groups: usize,
29    /// Method name.
30    pub method: String,
31}
32
33/// PERMANOVA: test whether groups differ in multivariate dispersion.
34///
35/// Uses squared distances to partition total sum of squares into among-group
36/// and within-group components, then tests significance by permuting group labels.
37///
38/// # Arguments
39///
40/// * `distances` — symmetric pairwise distance matrix
41/// * `groups` — group label for each sample (0-indexed integers)
42/// * `n_permutations` — number of permutations for p-value estimation
43/// * `seed` — random seed for reproducibility
44///
45/// # Errors
46///
47/// Returns an error if the distance matrix is not square, groups don't match,
48/// or fewer than 2 groups exist.
49pub fn permanova(
50    distances: &[Vec<f64>],
51    groups: &[usize],
52    n_permutations: usize,
53    seed: u64,
54) -> Result<PermanovaResult> {
55    let n = distances.len();
56    validate_distance_matrix(distances, n)?;
57    if groups.len() != n {
58        return Err(CyaneaError::InvalidInput(format!(
59            "permanova: groups length ({}) != distance matrix size ({})",
60            groups.len(),
61            n
62        )));
63    }
64    if n_permutations == 0 {
65        return Err(CyaneaError::InvalidInput(
66            "permanova: n_permutations must be > 0".into(),
67        ));
68    }
69
70    let unique_groups: std::collections::HashSet<usize> = groups.iter().copied().collect();
71    let k = unique_groups.len();
72    if k < 2 {
73        return Err(CyaneaError::InvalidInput(
74            "permanova: at least 2 groups required".into(),
75        ));
76    }
77
78    let observed_f = compute_pseudo_f(distances, groups, n, k);
79
80    // Permutation test
81    let mut rng = Xorshift64::new(seed);
82    let mut perm_groups: Vec<usize> = groups.to_vec();
83    let mut n_extreme = 0usize;
84
85    for _ in 0..n_permutations {
86        fisher_yates_shuffle_usize(&mut perm_groups, &mut rng);
87        let perm_f = compute_pseudo_f(distances, &perm_groups, n, k);
88        if perm_f >= observed_f {
89            n_extreme += 1;
90        }
91    }
92
93    let p_value = (n_extreme as f64 + 1.0) / (n_permutations as f64 + 1.0);
94
95    // R² = SS_among / SS_total
96    let ss_total = compute_ss_total(distances, n);
97    let ss_within = compute_ss_within(distances, groups, n);
98    let ss_among = ss_total - ss_within;
99    let r_squared = if ss_total > 0.0 {
100        ss_among / ss_total
101    } else {
102        0.0
103    };
104
105    Ok(PermanovaResult {
106        f_statistic: observed_f,
107        p_value,
108        n_permutations,
109        r_squared,
110        n_groups: k,
111        method: "PERMANOVA".to_string(),
112    })
113}
114
115fn compute_pseudo_f(distances: &[Vec<f64>], groups: &[usize], n: usize, k: usize) -> f64 {
116    let ss_total = compute_ss_total(distances, n);
117    let ss_within = compute_ss_within(distances, groups, n);
118    let ss_among = ss_total - ss_within;
119
120    let df_among = (k - 1) as f64;
121    let df_within = (n - k) as f64;
122
123    if df_within <= 0.0 || ss_within <= 0.0 {
124        return 0.0;
125    }
126
127    (ss_among / df_among) / (ss_within / df_within)
128}
129
130fn compute_ss_total(distances: &[Vec<f64>], n: usize) -> f64 {
131    let mut ss = 0.0;
132    for i in 0..n {
133        for j in (i + 1)..n {
134            ss += distances[i][j] * distances[i][j];
135        }
136    }
137    ss / n as f64
138}
139
140fn compute_ss_within(distances: &[Vec<f64>], groups: &[usize], _n: usize) -> f64 {
141    // Group samples by label
142    let mut group_map: std::collections::HashMap<usize, Vec<usize>> =
143        std::collections::HashMap::new();
144    for (i, &g) in groups.iter().enumerate() {
145        group_map.entry(g).or_default().push(i);
146    }
147
148    let mut ss_within = 0.0;
149    for members in group_map.values() {
150        let ng = members.len();
151        if ng < 2 {
152            continue;
153        }
154        let mut ss_g = 0.0;
155        for ii in 0..ng {
156            for jj in (ii + 1)..ng {
157                let d = distances[members[ii]][members[jj]];
158                ss_g += d * d;
159            }
160        }
161        ss_within += ss_g / ng as f64;
162    }
163    ss_within
164}
165
166// ── ANOSIM ──────────────────────────────────────────────────────────────────
167
168/// Result of ANOSIM.
169#[derive(Debug, Clone)]
170pub struct AnosimResult {
171    /// ANOSIM R statistic (range: -1 to 1).
172    pub r_statistic: f64,
173    /// Permutation p-value.
174    pub p_value: f64,
175    /// Number of permutations performed.
176    pub n_permutations: usize,
177}
178
179/// ANOSIM: analysis of similarities.
180///
181/// Compares mean ranks of between-group distances to mean ranks of within-group
182/// distances. `R = (r_between - r_within) / (n(n-1)/4)`.
183///
184/// # Errors
185///
186/// Returns an error if the distance matrix is invalid or fewer than 2 groups.
187pub fn anosim(
188    distances: &[Vec<f64>],
189    groups: &[usize],
190    n_permutations: usize,
191    seed: u64,
192) -> Result<AnosimResult> {
193    let n = distances.len();
194    validate_distance_matrix(distances, n)?;
195    if groups.len() != n {
196        return Err(CyaneaError::InvalidInput(format!(
197            "anosim: groups length ({}) != distance matrix size ({})",
198            groups.len(),
199            n
200        )));
201    }
202    if n_permutations == 0 {
203        return Err(CyaneaError::InvalidInput(
204            "anosim: n_permutations must be > 0".into(),
205        ));
206    }
207
208    let unique_groups: std::collections::HashSet<usize> = groups.iter().copied().collect();
209    if unique_groups.len() < 2 {
210        return Err(CyaneaError::InvalidInput(
211            "anosim: at least 2 groups required".into(),
212        ));
213    }
214
215    // Rank all pairwise distances
216    let ranks = rank_distances(distances, n);
217
218    let observed_r = compute_anosim_r(&ranks, groups, n);
219
220    // Permutation test
221    let mut rng = Xorshift64::new(seed);
222    let mut perm_groups: Vec<usize> = groups.to_vec();
223    let mut n_extreme = 0usize;
224
225    for _ in 0..n_permutations {
226        fisher_yates_shuffle_usize(&mut perm_groups, &mut rng);
227        let perm_r = compute_anosim_r(&ranks, &perm_groups, n);
228        if perm_r >= observed_r {
229            n_extreme += 1;
230        }
231    }
232
233    let p_value = (n_extreme as f64 + 1.0) / (n_permutations as f64 + 1.0);
234
235    Ok(AnosimResult {
236        r_statistic: observed_r,
237        p_value,
238        n_permutations,
239    })
240}
241
242fn rank_distances(distances: &[Vec<f64>], n: usize) -> Vec<Vec<f64>> {
243    // Collect upper triangle, rank them, put back
244    let n_pairs = n * (n - 1) / 2;
245    let mut flat = Vec::with_capacity(n_pairs);
246    for i in 0..n {
247        for j in (i + 1)..n {
248            flat.push(distances[i][j]);
249        }
250    }
251    let ranked = rank(&flat, RankMethod::Average);
252
253    let mut result = vec![vec![0.0; n]; n];
254    let mut idx = 0;
255    for i in 0..n {
256        for j in (i + 1)..n {
257            result[i][j] = ranked[idx];
258            result[j][i] = ranked[idx];
259            idx += 1;
260        }
261    }
262    result
263}
264
265fn compute_anosim_r(ranks: &[Vec<f64>], groups: &[usize], n: usize) -> f64 {
266    let mut r_between_sum = 0.0;
267    let mut r_within_sum = 0.0;
268    let mut n_between = 0usize;
269    let mut n_within = 0usize;
270
271    for i in 0..n {
272        for j in (i + 1)..n {
273            if groups[i] == groups[j] {
274                r_within_sum += ranks[i][j];
275                n_within += 1;
276            } else {
277                r_between_sum += ranks[i][j];
278                n_between += 1;
279            }
280        }
281    }
282
283    let r_between = if n_between > 0 {
284        r_between_sum / n_between as f64
285    } else {
286        0.0
287    };
288    let r_within = if n_within > 0 {
289        r_within_sum / n_within as f64
290    } else {
291        0.0
292    };
293
294    let m = (n * (n - 1)) as f64 / 4.0;
295    if m == 0.0 {
296        return 0.0;
297    }
298
299    (r_between - r_within) / m
300}
301
302// ── Mantel test ─────────────────────────────────────────────────────────────
303
304/// Result of Mantel test.
305#[derive(Debug, Clone)]
306pub struct MantelResult {
307    /// Correlation statistic (Pearson or Spearman).
308    pub statistic: f64,
309    /// Permutation p-value.
310    pub p_value: f64,
311    /// Number of permutations performed.
312    pub n_permutations: usize,
313    /// Method used ("pearson" or "spearman").
314    pub method: String,
315}
316
317/// Mantel test: correlation between two distance matrices.
318///
319/// Flattens the upper triangles of both matrices, computes the correlation
320/// (Pearson or Spearman), and tests significance by permuting rows/columns
321/// of one matrix.
322///
323/// # Arguments
324///
325/// * `matrix_a`, `matrix_b` — symmetric distance matrices of the same size
326/// * `n_permutations` — number of permutations
327/// * `seed` — random seed
328/// * `method` — "pearson" or "spearman"
329///
330/// # Errors
331///
332/// Returns an error if matrices have different sizes or are not square.
333pub fn mantel_test(
334    matrix_a: &[Vec<f64>],
335    matrix_b: &[Vec<f64>],
336    n_permutations: usize,
337    seed: u64,
338    method: &str,
339) -> Result<MantelResult> {
340    let n = matrix_a.len();
341    validate_distance_matrix(matrix_a, n)?;
342    validate_distance_matrix(matrix_b, n)?;
343    if matrix_b.len() != n {
344        return Err(CyaneaError::InvalidInput(format!(
345            "mantel: matrices must have same size ({} vs {})",
346            n,
347            matrix_b.len()
348        )));
349    }
350    if n_permutations == 0 {
351        return Err(CyaneaError::InvalidInput(
352            "mantel: n_permutations must be > 0".into(),
353        ));
354    }
355    if method != "pearson" && method != "spearman" {
356        return Err(CyaneaError::InvalidInput(format!(
357            "mantel: method must be 'pearson' or 'spearman', got '{}'",
358            method
359        )));
360    }
361
362    // Flatten upper triangles
363    let flat_a = upper_triangle_vec(matrix_a, n);
364    let flat_b = upper_triangle_vec(matrix_b, n);
365
366    let observed = if method == "pearson" {
367        correlation::pearson(&flat_a, &flat_b)?
368    } else {
369        correlation::spearman(&flat_a, &flat_b)?
370    };
371
372    // Permute rows/columns of matrix_a
373    let mut rng = Xorshift64::new(seed);
374    let mut perm_indices: Vec<usize> = (0..n).collect();
375    let mut n_extreme = 0usize;
376
377    for _ in 0..n_permutations {
378        fisher_yates_shuffle_usize(&mut perm_indices, &mut rng);
379        let perm_flat: Vec<f64> = {
380            let mut v = Vec::with_capacity(flat_a.len());
381            for i in 0..n {
382                for j in (i + 1)..n {
383                    v.push(matrix_a[perm_indices[i]][perm_indices[j]]);
384                }
385            }
386            v
387        };
388        let perm_stat = if method == "pearson" {
389            correlation::pearson(&perm_flat, &flat_b)?
390        } else {
391            correlation::spearman(&perm_flat, &flat_b)?
392        };
393        if perm_stat >= observed {
394            n_extreme += 1;
395        }
396    }
397
398    let p_value = (n_extreme as f64 + 1.0) / (n_permutations as f64 + 1.0);
399
400    Ok(MantelResult {
401        statistic: observed,
402        p_value,
403        n_permutations,
404        method: method.to_string(),
405    })
406}
407
408// ── AMOVA ───────────────────────────────────────────────────────────────────
409
410/// Result of AMOVA.
411#[derive(Debug, Clone)]
412pub struct AmovaResult {
413    /// Sum of squared deviations among groups.
414    pub ss_among: f64,
415    /// Sum of squared deviations within groups.
416    pub ss_within: f64,
417    /// Total sum of squared deviations.
418    pub ss_total: f64,
419    /// Degrees of freedom (among, within, total).
420    pub df: (usize, usize, usize),
421    /// Mean squares (among, within).
422    pub ms: (f64, f64),
423    /// F-statistic.
424    pub f_statistic: f64,
425    /// Permutation p-value.
426    pub p_value: f64,
427    /// Phi-statistic (proportion of variance among groups).
428    pub phi_statistic: f64,
429    /// Number of permutations performed.
430    pub n_permutations: usize,
431}
432
433/// AMOVA: analysis of molecular variance.
434///
435/// Partitions squared-distance variance into among- and within-group components.
436/// Tests significance by permuting group labels.
437///
438/// # Errors
439///
440/// Returns an error if the distance matrix is invalid or fewer than 2 groups.
441pub fn amova(
442    distances: &[Vec<f64>],
443    groups: &[usize],
444    n_permutations: usize,
445    seed: u64,
446) -> Result<AmovaResult> {
447    let n = distances.len();
448    validate_distance_matrix(distances, n)?;
449    if groups.len() != n {
450        return Err(CyaneaError::InvalidInput(format!(
451            "amova: groups length ({}) != distance matrix size ({})",
452            groups.len(),
453            n
454        )));
455    }
456    if n_permutations == 0 {
457        return Err(CyaneaError::InvalidInput(
458            "amova: n_permutations must be > 0".into(),
459        ));
460    }
461
462    let unique_groups: std::collections::HashSet<usize> = groups.iter().copied().collect();
463    let k = unique_groups.len();
464    if k < 2 {
465        return Err(CyaneaError::InvalidInput(
466            "amova: at least 2 groups required".into(),
467        ));
468    }
469
470    let (ss_among, ss_within, ss_total) = compute_amova_ss(distances, groups, n);
471
472    let df_among = k - 1;
473    let df_within = n - k;
474    let df_total = n - 1;
475
476    let ms_among = if df_among > 0 {
477        ss_among / df_among as f64
478    } else {
479        0.0
480    };
481    let ms_within = if df_within > 0 {
482        ss_within / df_within as f64
483    } else {
484        0.0
485    };
486
487    let f_statistic = if ms_within > 0.0 {
488        ms_among / ms_within
489    } else {
490        0.0
491    };
492
493    // Phi_ST = sigma²_among / sigma²_total
494    // sigma²_among = (MS_among - MS_within) / n_0  (n_0 is avg group size)
495    let group_sizes: Vec<usize> = {
496        let mut map: std::collections::HashMap<usize, usize> = std::collections::HashMap::new();
497        for &g in groups {
498            *map.entry(g).or_insert(0) += 1;
499        }
500        map.values().copied().collect()
501    };
502    let n0 = {
503        let sum_n: usize = group_sizes.iter().sum();
504        let sum_n2: usize = group_sizes.iter().map(|&ng| ng * ng).sum();
505        if k > 1 {
506            (sum_n as f64 - sum_n2 as f64 / sum_n as f64) / (k - 1) as f64
507        } else {
508            sum_n as f64
509        }
510    };
511
512    let sigma2_within = ms_within;
513    let sigma2_among = if n0 > 0.0 {
514        ((ms_among - ms_within) / n0).max(0.0)
515    } else {
516        0.0
517    };
518    let sigma2_total = sigma2_among + sigma2_within;
519    let phi_statistic = if sigma2_total > 0.0 {
520        sigma2_among / sigma2_total
521    } else {
522        0.0
523    };
524
525    // Permutation test
526    let mut rng = Xorshift64::new(seed);
527    let mut perm_groups: Vec<usize> = groups.to_vec();
528    let mut n_extreme = 0usize;
529
530    for _ in 0..n_permutations {
531        fisher_yates_shuffle_usize(&mut perm_groups, &mut rng);
532        let (perm_ss_among, perm_ss_within, _) =
533            compute_amova_ss(distances, &perm_groups, n);
534        let perm_ms_among = if df_among > 0 {
535            perm_ss_among / df_among as f64
536        } else {
537            0.0
538        };
539        let perm_ms_within = if df_within > 0 {
540            perm_ss_within / df_within as f64
541        } else {
542            0.0
543        };
544        let perm_f = if perm_ms_within > 0.0 {
545            perm_ms_among / perm_ms_within
546        } else {
547            0.0
548        };
549        if perm_f >= f_statistic {
550            n_extreme += 1;
551        }
552    }
553
554    let p_value = (n_extreme as f64 + 1.0) / (n_permutations as f64 + 1.0);
555
556    Ok(AmovaResult {
557        ss_among,
558        ss_within,
559        ss_total,
560        df: (df_among, df_within, df_total),
561        ms: (ms_among, ms_within),
562        f_statistic,
563        p_value,
564        phi_statistic,
565        n_permutations,
566    })
567}
568
569fn compute_amova_ss(
570    distances: &[Vec<f64>],
571    groups: &[usize],
572    n: usize,
573) -> (f64, f64, f64) {
574    // SSD_total = (1/n) Σ_{i<j} d²_{ij}
575    let mut ssd_total = 0.0;
576    for i in 0..n {
577        for j in (i + 1)..n {
578            ssd_total += distances[i][j] * distances[i][j];
579        }
580    }
581    let ss_total = ssd_total / n as f64;
582
583    // SSD_within = Σ_g (1/n_g) Σ_{i<j in g} d²_{ij}
584    let mut group_map: std::collections::HashMap<usize, Vec<usize>> =
585        std::collections::HashMap::new();
586    for (i, &g) in groups.iter().enumerate() {
587        group_map.entry(g).or_default().push(i);
588    }
589
590    let mut ss_within = 0.0;
591    for members in group_map.values() {
592        let ng = members.len();
593        if ng < 2 {
594            continue;
595        }
596        let mut ss_g = 0.0;
597        for ii in 0..ng {
598            for jj in (ii + 1)..ng {
599                let d = distances[members[ii]][members[jj]];
600                ss_g += d * d;
601            }
602        }
603        ss_within += ss_g / ng as f64;
604    }
605
606    let ss_among = ss_total - ss_within;
607    (ss_among, ss_within, ss_total)
608}
609
610// ── BIOENV ──────────────────────────────────────────────────────────────────
611
612/// Result of BIOENV analysis.
613#[derive(Debug, Clone)]
614pub struct BioenvResult {
615    /// Indices of the best subset of environmental variables (0-based).
616    pub best_variables: Vec<usize>,
617    /// Spearman correlation for the best subset.
618    pub best_correlation: f64,
619    /// All subsets tested with their correlations: `(variable indices, correlation)`.
620    pub all_results: Vec<(Vec<usize>, f64)>,
621}
622
623/// BIOENV: find the best subset of environmental variables explaining
624/// community distance structure.
625///
626/// For each subset of environmental variables (sizes 1 to `max_vars`), computes
627/// a Euclidean distance matrix and correlates it with the community distance
628/// matrix using Spearman correlation. Returns the best-correlated subset.
629///
630/// # Arguments
631///
632/// * `community_distances` — pairwise community distance matrix (n × n)
633/// * `env_variables` — flat row-major environmental data (n_samples × n_vars)
634/// * `n_samples` — number of samples
635/// * `n_vars` — number of environmental variables
636/// * `max_vars` — maximum subset size to test (capped at `n_vars`)
637///
638/// # Errors
639///
640/// Returns an error if dimensions are inconsistent.
641pub fn bioenv(
642    community_distances: &[Vec<f64>],
643    env_variables: &[f64],
644    n_samples: usize,
645    n_vars: usize,
646    max_vars: usize,
647) -> Result<BioenvResult> {
648    let n = community_distances.len();
649    validate_distance_matrix(community_distances, n)?;
650    if n != n_samples {
651        return Err(CyaneaError::InvalidInput(format!(
652            "bioenv: community_distances size ({}) != n_samples ({})",
653            n, n_samples
654        )));
655    }
656    if env_variables.len() != n_samples * n_vars {
657        return Err(CyaneaError::InvalidInput(format!(
658            "bioenv: env_variables length ({}) != n_samples ({}) * n_vars ({})",
659            env_variables.len(),
660            n_samples,
661            n_vars
662        )));
663    }
664    if n_vars == 0 {
665        return Err(CyaneaError::InvalidInput(
666            "bioenv: n_vars must be > 0".into(),
667        ));
668    }
669
670    let max_k = max_vars.min(n_vars);
671
672    // Flatten upper triangle of community distances for Spearman
673    let comm_flat = upper_triangle_vec(community_distances, n);
674
675    let mut best_corr = f64::NEG_INFINITY;
676    let mut best_vars = Vec::new();
677    let mut all_results = Vec::new();
678
679    // Enumerate all subsets of size 1..=max_k
680    for size in 1..=max_k {
681        let subsets = combinations(n_vars, size);
682        for subset in subsets {
683            // Compute Euclidean distance matrix for this subset
684            let env_dists = euclidean_distance_subset(env_variables, n_samples, n_vars, &subset);
685            let env_flat = upper_triangle_vec_owned(&env_dists, n);
686
687            // Spearman correlation
688            let rho = correlation::spearman(&comm_flat, &env_flat).unwrap_or(0.0);
689
690            all_results.push((subset.clone(), rho));
691            if rho > best_corr {
692                best_corr = rho;
693                best_vars = subset;
694            }
695        }
696    }
697
698    Ok(BioenvResult {
699        best_variables: best_vars,
700        best_correlation: best_corr,
701        all_results,
702    })
703}
704
705fn euclidean_distance_subset(
706    env: &[f64],
707    n: usize,
708    n_vars: usize,
709    vars: &[usize],
710) -> Vec<Vec<f64>> {
711    let mut dists = vec![vec![0.0; n]; n];
712    for i in 0..n {
713        for j in (i + 1)..n {
714            let mut d2 = 0.0;
715            for &v in vars {
716                let diff = env[i * n_vars + v] - env[j * n_vars + v];
717                d2 += diff * diff;
718            }
719            let d = d2.sqrt();
720            dists[i][j] = d;
721            dists[j][i] = d;
722        }
723    }
724    dists
725}
726
727fn combinations(n: usize, k: usize) -> Vec<Vec<usize>> {
728    let mut result = Vec::new();
729    let mut current = Vec::with_capacity(k);
730    fn helper(
731        start: usize,
732        n: usize,
733        k: usize,
734        current: &mut Vec<usize>,
735        result: &mut Vec<Vec<usize>>,
736    ) {
737        if current.len() == k {
738            result.push(current.clone());
739            return;
740        }
741        for i in start..n {
742            current.push(i);
743            helper(i + 1, n, k, current, result);
744            current.pop();
745        }
746    }
747    helper(0, n, k, &mut current, &mut result);
748    result
749}
750
751// ── Common helpers ──────────────────────────────────────────────────────────
752
753struct Xorshift64 {
754    state: u64,
755}
756
757impl Xorshift64 {
758    fn new(seed: u64) -> Self {
759        Self {
760            state: if seed == 0 { 1 } else { seed },
761        }
762    }
763
764    fn next_u64(&mut self) -> u64 {
765        let mut x = self.state;
766        x ^= x << 13;
767        x ^= x >> 7;
768        x ^= x << 17;
769        self.state = x;
770        x
771    }
772
773    fn next_usize(&mut self, n: usize) -> usize {
774        (self.next_u64() % n as u64) as usize
775    }
776}
777
778fn fisher_yates_shuffle_usize(slice: &mut [usize], rng: &mut Xorshift64) {
779    let n = slice.len();
780    for i in (1..n).rev() {
781        let j = rng.next_usize(i + 1);
782        slice.swap(i, j);
783    }
784}
785
786fn validate_distance_matrix(distances: &[Vec<f64>], n: usize) -> Result<()> {
787    if n < 2 {
788        return Err(CyaneaError::InvalidInput(
789            "distance matrix must have at least 2 samples".into(),
790        ));
791    }
792    for row in distances {
793        if row.len() != n {
794            return Err(CyaneaError::InvalidInput(
795                "distance matrix must be square".into(),
796            ));
797        }
798    }
799    Ok(())
800}
801
802fn upper_triangle_vec(matrix: &[Vec<f64>], n: usize) -> Vec<f64> {
803    let mut v = Vec::with_capacity(n * (n - 1) / 2);
804    for i in 0..n {
805        for j in (i + 1)..n {
806            v.push(matrix[i][j]);
807        }
808    }
809    v
810}
811
812fn upper_triangle_vec_owned(matrix: &[Vec<f64>], n: usize) -> Vec<f64> {
813    upper_triangle_vec(matrix, n)
814}
815
816#[cfg(test)]
817mod tests {
818    use super::*;
819
820    fn make_grouped_distances() -> (Vec<Vec<f64>>, Vec<usize>) {
821        // Two well-separated groups: {0..5} and {6..11}
822        // Within-group distances small, between-group distances large
823        let n = 12;
824        let mut dists = vec![vec![0.0; n]; n];
825        // Within group 0: small distances
826        for i in 0..6 {
827            for j in (i + 1)..6 {
828                dists[i][j] = 0.1;
829                dists[j][i] = 0.1;
830            }
831        }
832        // Within group 1: small distances
833        for i in 6..12 {
834            for j in (i + 1)..12 {
835                dists[i][j] = 0.1;
836                dists[j][i] = 0.1;
837            }
838        }
839        // Between groups: large distances
840        for i in 0..6 {
841            for j in 6..12 {
842                dists[i][j] = 5.0;
843                dists[j][i] = 5.0;
844            }
845        }
846        let groups = vec![0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1];
847        (dists, groups)
848    }
849
850    fn make_uniform_distances() -> (Vec<Vec<f64>>, Vec<usize>) {
851        // All pairwise distances equal → no group structure
852        let n = 12;
853        let mut dists = vec![vec![0.0; n]; n];
854        for i in 0..n {
855            for j in (i + 1)..n {
856                dists[i][j] = 1.0;
857                dists[j][i] = 1.0;
858            }
859        }
860        let groups = vec![0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1];
861        (dists, groups)
862    }
863
864    // ── PERMANOVA tests ──────────────────────────────────────────────
865
866    #[test]
867    fn permanova_separated_groups_significant() {
868        let (dists, groups) = make_grouped_distances();
869        let result = permanova(&dists, &groups, 999, 42).unwrap();
870        assert!(result.f_statistic > 1.0, "F={}", result.f_statistic);
871        assert!(result.p_value < 0.05, "p={}", result.p_value);
872        assert!(result.r_squared > 0.5, "R²={}", result.r_squared);
873    }
874
875    #[test]
876    fn permanova_uniform_not_significant() {
877        let (dists, groups) = make_uniform_distances();
878        let result = permanova(&dists, &groups, 999, 42).unwrap();
879        assert!(result.p_value > 0.05, "p={}", result.p_value);
880    }
881
882    #[test]
883    fn permanova_r_squared_in_range() {
884        let (dists, groups) = make_grouped_distances();
885        let result = permanova(&dists, &groups, 99, 42).unwrap();
886        assert!(result.r_squared >= 0.0 && result.r_squared <= 1.0);
887    }
888
889    #[test]
890    fn permanova_too_few_groups_error() {
891        let dists = vec![vec![0.0, 1.0], vec![1.0, 0.0]];
892        let groups = vec![0, 0]; // Only 1 group
893        assert!(permanova(&dists, &groups, 99, 42).is_err());
894    }
895
896    #[test]
897    fn permanova_mismatched_error() {
898        let dists = vec![vec![0.0, 1.0], vec![1.0, 0.0]];
899        let groups = vec![0, 1, 2]; // Wrong length
900        assert!(permanova(&dists, &groups, 99, 42).is_err());
901    }
902
903    // ── ANOSIM tests ─────────────────────────────────────────────────
904
905    #[test]
906    fn anosim_separated_groups() {
907        let (dists, groups) = make_grouped_distances();
908        let result = anosim(&dists, &groups, 999, 42).unwrap();
909        assert!(result.r_statistic > 0.0, "R={}", result.r_statistic);
910        assert!(result.p_value < 0.05, "p={}", result.p_value);
911    }
912
913    #[test]
914    fn anosim_r_in_valid_range() {
915        let (dists, groups) = make_grouped_distances();
916        let result = anosim(&dists, &groups, 99, 42).unwrap();
917        assert!(
918            result.r_statistic >= -1.0 && result.r_statistic <= 1.0,
919            "R={}",
920            result.r_statistic
921        );
922    }
923
924    #[test]
925    fn anosim_uniform_not_significant() {
926        let (dists, groups) = make_uniform_distances();
927        let result = anosim(&dists, &groups, 999, 42).unwrap();
928        // R should be near 0 (no group structure in uniform distances)
929        assert!(result.r_statistic.abs() < 0.5, "R={}", result.r_statistic);
930    }
931
932    #[test]
933    fn anosim_too_few_groups_error() {
934        let dists = vec![vec![0.0, 1.0], vec![1.0, 0.0]];
935        assert!(anosim(&dists, &[0, 0], 99, 42).is_err());
936    }
937
938    // ── Mantel test ──────────────────────────────────────────────────
939
940    #[test]
941    fn mantel_identical_matrices_correlation_one() {
942        let mat = vec![
943            vec![0.0, 1.0, 2.0, 3.0],
944            vec![1.0, 0.0, 1.5, 2.5],
945            vec![2.0, 1.5, 0.0, 1.0],
946            vec![3.0, 2.5, 1.0, 0.0],
947        ];
948        let result = mantel_test(&mat, &mat, 99, 42, "pearson").unwrap();
949        assert!(
950            (result.statistic - 1.0).abs() < 1e-10,
951            "r={}",
952            result.statistic
953        );
954    }
955
956    #[test]
957    fn mantel_pearson_vs_spearman() {
958        let mat_a = vec![
959            vec![0.0, 1.0, 3.0],
960            vec![1.0, 0.0, 2.0],
961            vec![3.0, 2.0, 0.0],
962        ];
963        let mat_b = vec![
964            vec![0.0, 1.0, 3.0],
965            vec![1.0, 0.0, 2.0],
966            vec![3.0, 2.0, 0.0],
967        ];
968        let r_p = mantel_test(&mat_a, &mat_b, 99, 42, "pearson").unwrap();
969        let r_s = mantel_test(&mat_a, &mat_b, 99, 42, "spearman").unwrap();
970        // Both should be high for identical matrices
971        assert!(r_p.statistic > 0.9);
972        assert!(r_s.statistic > 0.9);
973    }
974
975    #[test]
976    fn mantel_invalid_method_error() {
977        let mat = vec![vec![0.0, 1.0], vec![1.0, 0.0]];
978        assert!(mantel_test(&mat, &mat, 99, 42, "invalid").is_err());
979    }
980
981    #[test]
982    fn mantel_size_mismatch_error() {
983        let a = vec![vec![0.0, 1.0], vec![1.0, 0.0]];
984        let b = vec![vec![0.0, 1.0, 2.0], vec![1.0, 0.0, 1.5], vec![2.0, 1.5, 0.0]];
985        assert!(mantel_test(&a, &b, 99, 42, "pearson").is_err());
986    }
987
988    // ── AMOVA tests ──────────────────────────────────────────────────
989
990    #[test]
991    fn amova_ss_additivity() {
992        let (dists, groups) = make_grouped_distances();
993        let result = amova(&dists, &groups, 99, 42).unwrap();
994        let ss_sum = result.ss_among + result.ss_within;
995        assert!(
996            (ss_sum - result.ss_total).abs() < 1e-10,
997            "SS_among + SS_within = {} != SS_total = {}",
998            ss_sum,
999            result.ss_total
1000        );
1001    }
1002
1003    #[test]
1004    fn amova_separated_groups_significant() {
1005        let (dists, groups) = make_grouped_distances();
1006        let result = amova(&dists, &groups, 999, 42).unwrap();
1007        assert!(result.f_statistic > 1.0, "F={}", result.f_statistic);
1008        assert!(result.p_value < 0.05, "p={}", result.p_value);
1009        assert!(result.phi_statistic > 0.0, "Phi={}", result.phi_statistic);
1010    }
1011
1012    #[test]
1013    fn amova_phi_in_range() {
1014        let (dists, groups) = make_grouped_distances();
1015        let result = amova(&dists, &groups, 99, 42).unwrap();
1016        assert!(
1017            result.phi_statistic >= 0.0 && result.phi_statistic <= 1.0,
1018            "Phi={}",
1019            result.phi_statistic
1020        );
1021    }
1022
1023    #[test]
1024    fn amova_too_few_groups_error() {
1025        let dists = vec![vec![0.0, 1.0], vec![1.0, 0.0]];
1026        assert!(amova(&dists, &[0, 0], 99, 42).is_err());
1027    }
1028
1029    // ── BIOENV tests ─────────────────────────────────────────────────
1030
1031    #[test]
1032    fn bioenv_finds_correlated_variable() {
1033        // Community distances correlated with env variable 0
1034        let comm_dists = vec![
1035            vec![0.0, 1.0, 2.0, 3.0],
1036            vec![1.0, 0.0, 1.0, 2.0],
1037            vec![2.0, 1.0, 0.0, 1.0],
1038            vec![3.0, 2.0, 1.0, 0.0],
1039        ];
1040        // Env var 0: gradient matching community; var 1: noise
1041        let env = vec![
1042            0.0, 5.0, // sample 0
1043            1.0, 3.0, // sample 1
1044            2.0, 7.0, // sample 2
1045            3.0, 1.0, // sample 3
1046        ];
1047        let result = bioenv(&comm_dists, &env, 4, 2, 2).unwrap();
1048        // Best subset should include variable 0
1049        assert!(
1050            result.best_variables.contains(&0),
1051            "best_vars={:?}",
1052            result.best_variables
1053        );
1054        assert!(result.best_correlation > 0.0);
1055    }
1056
1057    #[test]
1058    fn bioenv_single_variable() {
1059        let comm_dists = vec![
1060            vec![0.0, 1.0, 2.0],
1061            vec![1.0, 0.0, 1.0],
1062            vec![2.0, 1.0, 0.0],
1063        ];
1064        let env = vec![0.0, 1.0, 2.0];
1065        let result = bioenv(&comm_dists, &env, 3, 1, 1).unwrap();
1066        assert_eq!(result.best_variables, vec![0]);
1067        assert_eq!(result.all_results.len(), 1);
1068    }
1069
1070    #[test]
1071    fn bioenv_dimension_mismatch_error() {
1072        let comm_dists = vec![vec![0.0, 1.0], vec![1.0, 0.0]];
1073        let env = vec![1.0]; // Wrong size
1074        assert!(bioenv(&comm_dists, &env, 2, 1, 1).is_err());
1075    }
1076
1077    #[test]
1078    fn bioenv_zero_vars_error() {
1079        let comm_dists = vec![vec![0.0, 1.0], vec![1.0, 0.0]];
1080        assert!(bioenv(&comm_dists, &[], 2, 0, 1).is_err());
1081    }
1082}