1use std::collections::{BTreeMap, HashSet};
16
17use cyanea_core::{CyaneaError, Result};
18
19use crate::correction;
20use crate::testing::ln_choose;
21
22#[derive(Debug, Clone)]
26pub struct GeneSet {
27 pub name: String,
29 pub genes: Vec<usize>,
31}
32
33#[derive(Debug, Clone)]
37pub struct OraResult {
38 pub gene_set: String,
40 pub overlap: usize,
42 pub expected: f64,
44 pub gene_set_size: usize,
46 pub p_value: f64,
48 pub p_adjusted: f64,
50}
51
52pub fn ora(
79 significant: &[usize],
80 gene_sets: &[GeneSet],
81 n_total: usize,
82) -> Result<Vec<OraResult>> {
83 if n_total == 0 {
85 return Err(CyaneaError::InvalidInput(
86 "ora: n_total must be > 0".into(),
87 ));
88 }
89 if gene_sets.is_empty() {
90 return Err(CyaneaError::InvalidInput(
91 "ora: gene_sets must be non-empty".into(),
92 ));
93 }
94 for &idx in significant {
95 if idx >= n_total {
96 return Err(CyaneaError::InvalidInput(format!(
97 "ora: significant gene index {} >= n_total {}",
98 idx, n_total,
99 )));
100 }
101 }
102 for gs in gene_sets {
103 for &idx in &gs.genes {
104 if idx >= n_total {
105 return Err(CyaneaError::InvalidInput(format!(
106 "ora: gene set '{}' contains index {} >= n_total {}",
107 gs.name, idx, n_total,
108 )));
109 }
110 }
111 }
112
113 let sig_set: HashSet<usize> = significant.iter().copied().collect();
114 let n = sig_set.len(); let mut results = Vec::with_capacity(gene_sets.len());
117
118 for gs in gene_sets {
119 let gs_unique: HashSet<usize> = gs.genes.iter().copied().collect();
121 let big_k = gs_unique.len(); let k = sig_set.intersection(&gs_unique).count();
125
126 let expected = n as f64 * big_k as f64 / n_total as f64;
128
129 let p_value = hypergeometric_upper_tail(k, n, big_k, n_total);
131
132 results.push(OraResult {
133 gene_set: gs.name.clone(),
134 overlap: k,
135 expected,
136 gene_set_size: big_k,
137 p_value,
138 p_adjusted: 1.0, });
140 }
141
142 let raw_p: Vec<f64> = results.iter().map(|r| r.p_value).collect();
144 let adj_p = correction::benjamini_hochberg(&raw_p)?;
145 for (r, &padj) in results.iter_mut().zip(adj_p.iter()) {
146 r.p_adjusted = padj;
147 }
148
149 results.sort_by(|a, b| a.p_value.total_cmp(&b.p_value));
151
152 Ok(results)
153}
154
155fn hypergeometric_upper_tail(k: usize, n: usize, big_k: usize, big_n: usize) -> f64 {
163 if k == 0 {
164 return 1.0;
165 }
166 let max_i = n.min(big_k);
167 if k > max_i {
168 return 0.0;
169 }
170
171 let mut sum = 0.0_f64;
173 let log_denom = ln_choose(big_n, n);
174 for i in k..=max_i {
175 if n < i || big_n - big_k < n - i {
177 continue;
178 }
179 let log_p = ln_choose(big_k, i) + ln_choose(big_n - big_k, n - i) - log_denom;
180 sum += log_p.exp();
181 }
182 sum.min(1.0)
183}
184
185#[derive(Debug, Clone)]
189pub struct GseaResult {
190 pub gene_set: String,
192 pub enrichment_score: f64,
194 pub normalized_es: f64,
196 pub p_value: f64,
198 pub p_adjusted: f64,
200 pub leading_edge_size: usize,
202 pub gene_set_size: usize,
204}
205
206pub fn gsea_preranked(
240 genes: &[usize],
241 scores: &[f64],
242 gene_sets: &[GeneSet],
243 weight: f64,
244 n_permutations: usize,
245) -> Result<Vec<GseaResult>> {
246 if genes.is_empty() {
248 return Err(CyaneaError::InvalidInput(
249 "gsea_preranked: genes must be non-empty".into(),
250 ));
251 }
252 if genes.len() != scores.len() {
253 return Err(CyaneaError::InvalidInput(format!(
254 "gsea_preranked: genes length ({}) != scores length ({})",
255 genes.len(),
256 scores.len(),
257 )));
258 }
259 if gene_sets.is_empty() {
260 return Err(CyaneaError::InvalidInput(
261 "gsea_preranked: gene_sets must be non-empty".into(),
262 ));
263 }
264 if n_permutations == 0 {
265 return Err(CyaneaError::InvalidInput(
266 "gsea_preranked: n_permutations must be > 0".into(),
267 ));
268 }
269
270 let gene_set_in_list: HashSet<usize> = genes.iter().copied().collect();
271
272 let mut results = Vec::with_capacity(gene_sets.len());
273
274 for gs in gene_sets {
275 let set_members: HashSet<usize> = gs.genes.iter().copied()
277 .filter(|g| gene_set_in_list.contains(g))
278 .collect();
279 let n_h = set_members.len();
280
281 if n_h == 0 {
282 results.push(GseaResult {
284 gene_set: gs.name.clone(),
285 enrichment_score: 0.0,
286 normalized_es: 0.0,
287 p_value: 1.0,
288 p_adjusted: 1.0,
289 leading_edge_size: 0,
290 gene_set_size: 0,
291 });
292 continue;
293 }
294
295 let (es, leading_edge) = compute_es(genes, scores, &set_members, n_h, weight);
297
298 let mut rng = Xorshift64::new(42);
300 let mut null_es = Vec::with_capacity(n_permutations);
301
302 let mut perm_scores: Vec<f64> = scores.to_vec();
304 for _ in 0..n_permutations {
305 fisher_yates_shuffle(&mut perm_scores, &mut rng);
306 let (perm_es, _) = compute_es(genes, &perm_scores, &set_members, n_h, weight);
307 null_es.push(perm_es);
308 }
309
310 let p_value = if es >= 0.0 {
313 let count = null_es.iter().filter(|&&x| x >= es).count();
314 (count as f64 / n_permutations as f64).max(1.0 / n_permutations as f64)
315 } else {
316 let count = null_es.iter().filter(|&&x| x <= es).count();
317 (count as f64 / n_permutations as f64).max(1.0 / n_permutations as f64)
318 };
319
320 let normalized_es = if es >= 0.0 {
322 let pos_null: Vec<f64> = null_es.iter().copied().filter(|&x| x >= 0.0).collect();
323 if pos_null.is_empty() {
324 0.0
325 } else {
326 let mean_pos: f64 = pos_null.iter().sum::<f64>() / pos_null.len() as f64;
327 if mean_pos > 1e-15 { es / mean_pos } else { 0.0 }
328 }
329 } else {
330 let neg_null: Vec<f64> = null_es.iter().copied().filter(|&x| x < 0.0).collect();
331 if neg_null.is_empty() {
332 0.0
333 } else {
334 let mean_neg: f64 = neg_null.iter().map(|x| x.abs()).sum::<f64>() / neg_null.len() as f64;
335 if mean_neg > 1e-15 { -(es.abs() / mean_neg) } else { 0.0 }
336 }
337 };
338
339 results.push(GseaResult {
340 gene_set: gs.name.clone(),
341 enrichment_score: es,
342 normalized_es,
343 p_value,
344 p_adjusted: 1.0, leading_edge_size: leading_edge,
346 gene_set_size: n_h,
347 });
348 }
349
350 let raw_p: Vec<f64> = results.iter().map(|r| r.p_value).collect();
352 let adj_p = correction::benjamini_hochberg(&raw_p)?;
353 for (r, &padj) in results.iter_mut().zip(adj_p.iter()) {
354 r.p_adjusted = padj;
355 }
356
357 results.sort_by(|a, b| a.p_value.total_cmp(&b.p_value));
359
360 Ok(results)
361}
362
363fn compute_es(
367 genes: &[usize],
368 scores: &[f64],
369 set_members: &HashSet<usize>,
370 n_h: usize,
371 weight: f64,
372) -> (f64, usize) {
373 let n = genes.len();
374
375 let n_r: f64 = genes.iter().zip(scores.iter())
377 .filter(|(&g, _)| set_members.contains(&g))
378 .map(|(_, &s)| s.abs().powf(weight))
379 .sum();
380
381 let n_miss = n - n_h;
383 let miss_penalty = if n_miss > 0 { 1.0 / n_miss as f64 } else { 0.0 };
384
385 let mut running_sum = 0.0_f64;
386 let mut max_dev = 0.0_f64;
387 let mut max_dev_pos = 0usize; let mut max_dev_signed = 0.0_f64;
389
390 for (i, (&g, &s)) in genes.iter().zip(scores.iter()).enumerate() {
391 if set_members.contains(&g) {
392 let hit_score = if n_r > 1e-15 {
393 s.abs().powf(weight) / n_r
394 } else {
395 1.0 / n_h as f64
396 };
397 running_sum += hit_score;
398 } else {
399 running_sum -= miss_penalty;
400 }
401
402 if running_sum.abs() > max_dev {
403 max_dev = running_sum.abs();
404 max_dev_signed = running_sum;
405 max_dev_pos = i;
406 }
407 }
408
409 let leading_edge = genes[..=max_dev_pos].iter()
411 .filter(|g| set_members.contains(g))
412 .count();
413
414 (max_dev_signed, leading_edge)
415}
416
417struct Xorshift64 {
421 state: u64,
422}
423
424impl Xorshift64 {
425 fn new(seed: u64) -> Self {
426 Self { state: if seed == 0 { 1 } else { seed } }
428 }
429
430 fn next_u64(&mut self) -> u64 {
431 let mut x = self.state;
432 x ^= x << 13;
433 x ^= x >> 7;
434 x ^= x << 17;
435 self.state = x;
436 x
437 }
438
439 fn next_usize(&mut self, n: usize) -> usize {
441 (self.next_u64() % n as u64) as usize
442 }
443}
444
445fn fisher_yates_shuffle(slice: &mut [f64], rng: &mut Xorshift64) {
447 let n = slice.len();
448 for i in (1..n).rev() {
449 let j = rng.next_usize(i + 1);
450 slice.swap(i, j);
451 }
452}
453
454#[derive(Debug, Clone, Copy, PartialEq, Eq)]
458pub enum GoNamespace {
459 BiologicalProcess,
461 MolecularFunction,
463 CellularComponent,
465}
466
467#[derive(Debug, Clone)]
469pub struct GoTerm {
470 pub id: String,
472 pub name: String,
474 pub namespace: GoNamespace,
476 pub genes: Vec<usize>,
478}
479
480#[derive(Debug, Clone)]
482pub struct GoAnnotation {
483 terms: Vec<GoTerm>,
484}
485
486impl GoAnnotation {
487 pub fn new(terms: Vec<GoTerm>) -> Result<Self> {
489 if terms.is_empty() {
490 return Err(CyaneaError::InvalidInput(
491 "GoAnnotation: terms must be non-empty".into(),
492 ));
493 }
494 Ok(Self { terms })
495 }
496
497 pub fn from_entries(entries: &[(usize, &str, &str, GoNamespace)]) -> Result<Self> {
502 if entries.is_empty() {
503 return Err(CyaneaError::InvalidInput(
504 "GoAnnotation::from_entries: entries must be non-empty".into(),
505 ));
506 }
507 let mut map: BTreeMap<String, (String, GoNamespace, Vec<usize>)> = BTreeMap::new();
508 for &(gene, term_id, term_name, ns) in entries {
509 map.entry(term_id.to_string())
510 .and_modify(|e| e.2.push(gene))
511 .or_insert_with(|| (term_name.to_string(), ns, vec![gene]));
512 }
513 let terms = map
514 .into_iter()
515 .map(|(id, (name, namespace, genes))| GoTerm {
516 id,
517 name,
518 namespace,
519 genes,
520 })
521 .collect();
522 Ok(Self { terms })
523 }
524
525 pub fn n_terms(&self) -> usize {
527 self.terms.len()
528 }
529
530 pub fn n_genes(&self) -> usize {
532 let all: HashSet<usize> = self.terms.iter().flat_map(|t| t.genes.iter().copied()).collect();
533 all.len()
534 }
535
536 pub fn terms_for_gene(&self, gene: usize) -> Vec<&str> {
538 self.terms
539 .iter()
540 .filter(|t| t.genes.contains(&gene))
541 .map(|t| t.id.as_str())
542 .collect()
543 }
544
545 pub fn filter_namespace(&self, ns: GoNamespace) -> Result<Self> {
547 let filtered: Vec<GoTerm> = self
548 .terms
549 .iter()
550 .filter(|t| t.namespace == ns)
551 .cloned()
552 .collect();
553 if filtered.is_empty() {
554 return Err(CyaneaError::InvalidInput(format!(
555 "GoAnnotation::filter_namespace: no terms in namespace {:?}",
556 ns,
557 )));
558 }
559 Ok(Self { terms: filtered })
560 }
561}
562
563#[derive(Debug, Clone)]
565pub struct GoEnrichmentConfig {
566 pub min_genes: usize,
568 pub max_genes: usize,
570 pub namespace: Option<GoNamespace>,
572}
573
574impl Default for GoEnrichmentConfig {
575 fn default() -> Self {
576 Self {
577 min_genes: 5,
578 max_genes: 500,
579 namespace: None,
580 }
581 }
582}
583
584#[derive(Debug, Clone)]
586pub struct GoEnrichmentResult {
587 pub term_id: String,
589 pub term_name: String,
591 pub namespace: GoNamespace,
593 pub overlap: usize,
595 pub expected: f64,
597 pub gene_set_size: usize,
599 pub p_value: f64,
601 pub p_adjusted: f64,
603}
604
605pub fn go_enrichment(
639 significant: &[usize],
640 annotation: &GoAnnotation,
641 n_total: usize,
642 config: &GoEnrichmentConfig,
643) -> Result<Vec<GoEnrichmentResult>> {
644 if config.min_genes > config.max_genes {
645 return Err(CyaneaError::InvalidInput(format!(
646 "go_enrichment: min_genes ({}) > max_genes ({})",
647 config.min_genes, config.max_genes,
648 )));
649 }
650
651 let filtered: Vec<&GoTerm> = annotation
653 .terms
654 .iter()
655 .filter(|t| match config.namespace {
656 Some(ns) => t.namespace == ns,
657 None => true,
658 })
659 .filter(|t| {
660 let unique: HashSet<usize> = t.genes.iter().copied().collect();
661 let n = unique.len();
662 n >= config.min_genes && n <= config.max_genes
663 })
664 .collect();
665
666 if filtered.is_empty() {
667 return Err(CyaneaError::InvalidInput(
668 "go_enrichment: no GO terms pass size/namespace filters".into(),
669 ));
670 }
671
672 let gene_sets: Vec<GeneSet> = filtered
674 .iter()
675 .map(|t| GeneSet {
676 name: t.id.clone(),
677 genes: t.genes.clone(),
678 })
679 .collect();
680
681 let ora_results = ora(significant, &gene_sets, n_total)?;
683
684 let term_map: BTreeMap<&str, &GoTerm> = filtered.iter().map(|t| (t.id.as_str(), *t)).collect();
686
687 let results = ora_results
688 .into_iter()
689 .map(|r| {
690 let term = term_map[r.gene_set.as_str()];
691 GoEnrichmentResult {
692 term_id: r.gene_set,
693 term_name: term.name.clone(),
694 namespace: term.namespace,
695 overlap: r.overlap,
696 expected: r.expected,
697 gene_set_size: r.gene_set_size,
698 p_value: r.p_value,
699 p_adjusted: r.p_adjusted,
700 }
701 })
702 .collect();
703
704 Ok(results)
705}
706
707#[cfg(test)]
710mod tests {
711 use super::*;
712
713 #[test]
716 fn ora_basic_enrichment() {
717 let significant = vec![0, 1, 2, 3, 4];
719 let gene_sets = vec![
720 GeneSet { name: "enriched".into(), genes: vec![0, 1, 2, 50, 51] },
721 ];
722 let results = ora(&significant, &gene_sets, 100).unwrap();
723 assert_eq!(results.len(), 1);
724 assert_eq!(results[0].overlap, 3);
725 assert_eq!(results[0].gene_set_size, 5);
726 assert!(results[0].p_value < 0.01, "p={}", results[0].p_value);
728 }
729
730 #[test]
731 fn ora_no_overlap() {
732 let significant = vec![0, 1, 2];
733 let gene_sets = vec![
734 GeneSet { name: "disjoint".into(), genes: vec![50, 51, 52] },
735 ];
736 let results = ora(&significant, &gene_sets, 100).unwrap();
737 assert_eq!(results[0].overlap, 0);
738 assert!((results[0].p_value - 1.0).abs() < 1e-10, "p={}", results[0].p_value);
739 }
740
741 #[test]
742 fn ora_complete_overlap() {
743 let significant = vec![0, 1, 2, 3, 4];
745 let gene_sets = vec![
746 GeneSet { name: "complete".into(), genes: vec![0, 1, 2, 3, 4] },
747 ];
748 let results = ora(&significant, &gene_sets, 1000).unwrap();
749 assert_eq!(results[0].overlap, 5);
750 assert!(results[0].p_value < 0.001, "p={}", results[0].p_value);
751 }
752
753 #[test]
754 fn ora_empty_significant() {
755 let significant: Vec<usize> = vec![];
756 let gene_sets = vec![
757 GeneSet { name: "any".into(), genes: vec![0, 1, 2] },
758 ];
759 let results = ora(&significant, &gene_sets, 100).unwrap();
760 assert_eq!(results[0].overlap, 0);
761 assert!((results[0].p_value - 1.0).abs() < 1e-10);
762 }
763
764 #[test]
765 fn ora_expected_overlap() {
766 let significant: Vec<usize> = (0..10).collect();
768 let gene_sets = vec![
769 GeneSet { name: "gs".into(), genes: (0..20).collect() },
770 ];
771 let results = ora(&significant, &gene_sets, 100).unwrap();
772 assert!((results[0].expected - 2.0).abs() < 1e-10, "expected={}", results[0].expected);
774 }
775
776 #[test]
777 fn ora_bh_correction_applied() {
778 let significant: Vec<usize> = (0..5).collect();
780 let gene_sets = vec![
781 GeneSet { name: "gs1".into(), genes: vec![0, 1, 2, 50, 51] },
782 GeneSet { name: "gs2".into(), genes: vec![60, 61, 62, 63, 64] },
783 GeneSet { name: "gs3".into(), genes: vec![0, 1, 70, 71, 72] },
784 ];
785 let results = ora(&significant, &gene_sets, 100).unwrap();
786 for r in &results {
787 assert!(
788 r.p_adjusted >= r.p_value - 1e-15,
789 "{}: padj={} < p={}",
790 r.gene_set, r.p_adjusted, r.p_value,
791 );
792 }
793 }
794
795 #[test]
796 fn ora_sorted_by_pvalue() {
797 let significant: Vec<usize> = (0..5).collect();
798 let gene_sets = vec![
799 GeneSet { name: "gs1".into(), genes: vec![0, 1, 2, 50, 51] },
800 GeneSet { name: "gs2".into(), genes: vec![60, 61, 62, 63, 64] },
801 GeneSet { name: "gs3".into(), genes: vec![0, 1, 2, 3, 4] },
802 ];
803 let results = ora(&significant, &gene_sets, 100).unwrap();
804 for w in results.windows(2) {
805 assert!(w[0].p_value <= w[1].p_value + 1e-15);
806 }
807 }
808
809 #[test]
810 fn ora_error_index_too_large() {
811 let significant = vec![100]; let gene_sets = vec![
813 GeneSet { name: "gs".into(), genes: vec![0] },
814 ];
815 assert!(ora(&significant, &gene_sets, 100).is_err());
816 }
817
818 #[test]
819 fn ora_error_gene_set_index_too_large() {
820 let significant = vec![0];
821 let gene_sets = vec![
822 GeneSet { name: "gs".into(), genes: vec![200] },
823 ];
824 assert!(ora(&significant, &gene_sets, 100).is_err());
825 }
826
827 #[test]
828 fn ora_error_empty_gene_sets() {
829 assert!(ora(&[0], &[], 100).is_err());
830 }
831
832 #[test]
833 fn ora_error_zero_universe() {
834 let gene_sets = vec![GeneSet { name: "gs".into(), genes: vec![] }];
835 assert!(ora(&[], &gene_sets, 0).is_err());
836 }
837
838 #[test]
839 fn ora_duplicate_genes_deduplicated() {
840 let significant = vec![0, 0, 1, 1, 2];
842 let gene_sets = vec![
843 GeneSet { name: "gs".into(), genes: vec![0, 0, 1, 50, 50] },
844 ];
845 let results = ora(&significant, &gene_sets, 100).unwrap();
846 assert_eq!(results[0].overlap, 2); assert_eq!(results[0].gene_set_size, 3); }
850
851 #[test]
854 fn gsea_genes_at_top() {
855 let genes: Vec<usize> = (0..50).collect();
857 let scores: Vec<f64> = (0..50).rev().map(|i| i as f64).collect();
858 let gene_sets = vec![
859 GeneSet { name: "top".into(), genes: vec![0, 1, 2, 3, 4] },
860 ];
861 let results = gsea_preranked(&genes, &scores, &gene_sets, 1.0, 200).unwrap();
862 assert!(results[0].enrichment_score > 0.0, "ES={}", results[0].enrichment_score);
863 }
864
865 #[test]
866 fn gsea_genes_at_bottom() {
867 let genes: Vec<usize> = (0..50).collect();
869 let scores: Vec<f64> = (0..50).rev().map(|i| i as f64).collect();
870 let gene_sets = vec![
871 GeneSet { name: "bottom".into(), genes: vec![45, 46, 47, 48, 49] },
872 ];
873 let results = gsea_preranked(&genes, &scores, &gene_sets, 1.0, 200).unwrap();
874 assert!(results[0].enrichment_score < 0.0, "ES={}", results[0].enrichment_score);
875 }
876
877 #[test]
878 fn gsea_uniform_distribution() {
879 let genes: Vec<usize> = (0..100).collect();
881 let scores: Vec<f64> = (0..100).rev().map(|i| i as f64).collect();
882 let gene_sets = vec![
883 GeneSet { name: "uniform".into(), genes: vec![0, 20, 40, 60, 80] },
884 ];
885 let results = gsea_preranked(&genes, &scores, &gene_sets, 1.0, 200).unwrap();
886 assert!(results[0].enrichment_score.abs() < 0.5, "ES={}", results[0].enrichment_score);
888 }
889
890 #[test]
891 fn gsea_unweighted() {
892 let genes: Vec<usize> = (0..20).collect();
894 let scores: Vec<f64> = (0..20).rev().map(|i| i as f64).collect();
895 let gene_sets = vec![
896 GeneSet { name: "top".into(), genes: vec![0, 1, 2, 3, 4] },
897 ];
898 let results = gsea_preranked(&genes, &scores, &gene_sets, 0.0, 100).unwrap();
899 assert!(results[0].enrichment_score > 0.0);
900 }
901
902 #[test]
903 fn gsea_weight_one_classic() {
904 let genes: Vec<usize> = (0..20).collect();
906 let scores: Vec<f64> = (0..20).rev().map(|i| i as f64 + 1.0).collect();
907 let gene_sets = vec![
908 GeneSet { name: "top".into(), genes: vec![0, 1, 2, 3, 4] },
909 ];
910 let results = gsea_preranked(&genes, &scores, &gene_sets, 1.0, 100).unwrap();
911 assert!(results[0].enrichment_score > 0.0);
912 }
913
914 #[test]
915 fn gsea_nes_sign_matches_es() {
916 let genes: Vec<usize> = (0..50).collect();
917 let scores: Vec<f64> = (0..50).rev().map(|i| i as f64).collect();
918 let gene_sets = vec![
919 GeneSet { name: "top".into(), genes: vec![0, 1, 2, 3, 4] },
920 GeneSet { name: "bottom".into(), genes: vec![45, 46, 47, 48, 49] },
921 ];
922 let results = gsea_preranked(&genes, &scores, &gene_sets, 1.0, 200).unwrap();
923 for r in &results {
924 if r.enrichment_score > 0.0 {
925 assert!(r.normalized_es >= 0.0, "NES={} but ES={}", r.normalized_es, r.enrichment_score);
926 } else if r.enrichment_score < 0.0 {
927 assert!(r.normalized_es <= 0.0, "NES={} but ES={}", r.normalized_es, r.enrichment_score);
928 }
929 }
930 }
931
932 #[test]
933 fn gsea_leading_edge_size() {
934 let genes: Vec<usize> = (0..20).collect();
935 let scores: Vec<f64> = (0..20).rev().map(|i| i as f64).collect();
936 let gene_sets = vec![
937 GeneSet { name: "top".into(), genes: vec![0, 1, 2, 3, 4] },
938 ];
939 let results = gsea_preranked(&genes, &scores, &gene_sets, 1.0, 100).unwrap();
940 assert!(results[0].leading_edge_size >= 1);
942 assert!(results[0].leading_edge_size <= results[0].gene_set_size);
943 }
944
945 #[test]
946 fn gsea_pvalue_in_range() {
947 let genes: Vec<usize> = (0..30).collect();
948 let scores: Vec<f64> = (0..30).rev().map(|i| i as f64).collect();
949 let gene_sets = vec![
950 GeneSet { name: "gs".into(), genes: vec![0, 1, 2, 15, 16] },
951 ];
952 let results = gsea_preranked(&genes, &scores, &gene_sets, 1.0, 100).unwrap();
953 assert!(results[0].p_value >= 0.0 && results[0].p_value <= 1.0,
954 "p={}", results[0].p_value);
955 assert!(results[0].p_adjusted >= 0.0 && results[0].p_adjusted <= 1.0,
956 "padj={}", results[0].p_adjusted);
957 }
958
959 #[test]
960 fn gsea_deterministic_with_fixed_seed() {
961 let genes: Vec<usize> = (0..30).collect();
962 let scores: Vec<f64> = (0..30).rev().map(|i| i as f64).collect();
963 let gene_sets = vec![
964 GeneSet { name: "gs".into(), genes: vec![0, 1, 2, 3, 4] },
965 ];
966 let r1 = gsea_preranked(&genes, &scores, &gene_sets, 1.0, 100).unwrap();
967 let r2 = gsea_preranked(&genes, &scores, &gene_sets, 1.0, 100).unwrap();
968 assert!((r1[0].enrichment_score - r2[0].enrichment_score).abs() < 1e-15);
969 assert!((r1[0].p_value - r2[0].p_value).abs() < 1e-15);
970 }
971
972 #[test]
973 fn gsea_no_overlap_with_list() {
974 let genes: Vec<usize> = (0..10).collect();
976 let scores: Vec<f64> = (0..10).rev().map(|i| i as f64).collect();
977 let gene_sets = vec![
978 GeneSet { name: "outside".into(), genes: vec![100, 101, 102] },
979 ];
980 let results = gsea_preranked(&genes, &scores, &gene_sets, 1.0, 100).unwrap();
981 assert_eq!(results[0].gene_set_size, 0);
982 assert!((results[0].enrichment_score - 0.0).abs() < 1e-15);
983 assert!((results[0].p_value - 1.0).abs() < 1e-15);
984 }
985
986 #[test]
987 fn gsea_single_gene_set_single_gene() {
988 let genes: Vec<usize> = (0..10).collect();
989 let scores: Vec<f64> = (0..10).rev().map(|i| i as f64 + 1.0).collect();
990 let gene_sets = vec![
991 GeneSet { name: "single".into(), genes: vec![0] },
992 ];
993 let results = gsea_preranked(&genes, &scores, &gene_sets, 1.0, 100).unwrap();
994 assert_eq!(results[0].gene_set_size, 1);
995 assert!(results[0].enrichment_score > 0.0);
996 }
997
998 #[test]
999 fn gsea_error_empty_genes() {
1000 let gene_sets = vec![GeneSet { name: "gs".into(), genes: vec![0] }];
1001 assert!(gsea_preranked(&[], &[], &gene_sets, 1.0, 100).is_err());
1002 }
1003
1004 #[test]
1005 fn gsea_error_mismatched_lengths() {
1006 let gene_sets = vec![GeneSet { name: "gs".into(), genes: vec![0] }];
1007 assert!(gsea_preranked(&[0, 1], &[1.0], &gene_sets, 1.0, 100).is_err());
1008 }
1009
1010 #[test]
1011 fn gsea_error_empty_gene_sets() {
1012 assert!(gsea_preranked(&[0], &[1.0], &[], 1.0, 100).is_err());
1013 }
1014
1015 #[test]
1016 fn gsea_error_zero_permutations() {
1017 let gene_sets = vec![GeneSet { name: "gs".into(), genes: vec![0] }];
1018 assert!(gsea_preranked(&[0], &[1.0], &gene_sets, 1.0, 0).is_err());
1019 }
1020
1021 #[test]
1022 fn gsea_multiple_sets_bh_correction() {
1023 let genes: Vec<usize> = (0..50).collect();
1024 let scores: Vec<f64> = (0..50).rev().map(|i| i as f64).collect();
1025 let gene_sets = vec![
1026 GeneSet { name: "gs1".into(), genes: vec![0, 1, 2, 3, 4] },
1027 GeneSet { name: "gs2".into(), genes: vec![25, 26, 27, 28, 29] },
1028 GeneSet { name: "gs3".into(), genes: vec![45, 46, 47, 48, 49] },
1029 ];
1030 let results = gsea_preranked(&genes, &scores, &gene_sets, 1.0, 200).unwrap();
1031 for r in &results {
1033 assert!(
1034 r.p_adjusted >= r.p_value - 1e-15,
1035 "{}: padj={} < p={}",
1036 r.gene_set, r.p_adjusted, r.p_value,
1037 );
1038 }
1039 for w in results.windows(2) {
1041 assert!(w[0].p_value <= w[1].p_value + 1e-15);
1042 }
1043 }
1044
1045 #[test]
1048 fn xorshift_deterministic() {
1049 let mut rng1 = Xorshift64::new(42);
1050 let mut rng2 = Xorshift64::new(42);
1051 for _ in 0..100 {
1052 assert_eq!(rng1.next_u64(), rng2.next_u64());
1053 }
1054 }
1055
1056 #[test]
1057 fn xorshift_different_seeds() {
1058 let mut rng1 = Xorshift64::new(1);
1059 let mut rng2 = Xorshift64::new(2);
1060 let differs = (0..10).any(|_| rng1.next_u64() != rng2.next_u64());
1062 assert!(differs);
1063 }
1064
1065 fn make_go_terms() -> Vec<GoTerm> {
1068 vec![
1069 GoTerm {
1070 id: "GO:0000001".into(),
1071 name: "mito inheritance".into(),
1072 namespace: GoNamespace::BiologicalProcess,
1073 genes: vec![0, 1, 2, 3, 4, 50, 51, 52, 53, 54],
1074 },
1075 GoTerm {
1076 id: "GO:0000002".into(),
1077 name: "mito genome maintenance".into(),
1078 namespace: GoNamespace::BiologicalProcess,
1079 genes: vec![60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
1080 },
1081 GoTerm {
1082 id: "GO:0000003".into(),
1083 name: "ATP binding".into(),
1084 namespace: GoNamespace::MolecularFunction,
1085 genes: vec![0, 1, 10, 11, 12, 13, 14, 15, 16, 17],
1086 },
1087 ]
1088 }
1089
1090 #[test]
1091 fn go_annotation_new_empty_error() {
1092 assert!(GoAnnotation::new(vec![]).is_err());
1093 }
1094
1095 #[test]
1096 fn go_annotation_from_entries_merges() {
1097 let entries = vec![
1098 (0usize, "GO:0000001", "term A", GoNamespace::BiologicalProcess),
1099 (1, "GO:0000001", "term A", GoNamespace::BiologicalProcess),
1100 (2, "GO:0000002", "term B", GoNamespace::MolecularFunction),
1101 ];
1102 let ann = GoAnnotation::from_entries(&entries).unwrap();
1103 assert_eq!(ann.n_terms(), 2);
1104 let t = ann.terms.iter().find(|t| t.id == "GO:0000001").unwrap();
1106 assert_eq!(t.genes, vec![0, 1]);
1107 }
1108
1109 #[test]
1110 fn go_annotation_from_entries_empty_error() {
1111 let entries: Vec<(usize, &str, &str, GoNamespace)> = vec![];
1112 assert!(GoAnnotation::from_entries(&entries).is_err());
1113 }
1114
1115 #[test]
1116 fn go_annotation_n_terms_and_n_genes() {
1117 let ann = GoAnnotation::new(make_go_terms()).unwrap();
1118 assert_eq!(ann.n_terms(), 3);
1119 let expected_genes: HashSet<usize> = make_go_terms()
1121 .iter()
1122 .flat_map(|t| t.genes.iter().copied())
1123 .collect();
1124 assert_eq!(ann.n_genes(), expected_genes.len());
1125 }
1126
1127 #[test]
1128 fn go_annotation_terms_for_gene() {
1129 let ann = GoAnnotation::new(make_go_terms()).unwrap();
1130 let mut ids = ann.terms_for_gene(0);
1132 ids.sort();
1133 assert_eq!(ids, vec!["GO:0000001", "GO:0000003"]);
1134 assert_eq!(ann.terms_for_gene(60), vec!["GO:0000002"]);
1136 assert!(ann.terms_for_gene(99).is_empty());
1138 }
1139
1140 #[test]
1141 fn go_annotation_filter_namespace() {
1142 let ann = GoAnnotation::new(make_go_terms()).unwrap();
1143 let bp = ann.filter_namespace(GoNamespace::BiologicalProcess).unwrap();
1144 assert_eq!(bp.n_terms(), 2);
1145 for t in &bp.terms {
1146 assert_eq!(t.namespace, GoNamespace::BiologicalProcess);
1147 }
1148 }
1149
1150 #[test]
1151 fn go_annotation_filter_namespace_empty_error() {
1152 let ann = GoAnnotation::new(make_go_terms()).unwrap();
1153 assert!(ann.filter_namespace(GoNamespace::CellularComponent).is_err());
1154 }
1155
1156 #[test]
1157 fn go_enrichment_basic() {
1158 let ann = GoAnnotation::new(make_go_terms()).unwrap();
1159 let significant = vec![0, 1, 2, 3, 4]; let config = GoEnrichmentConfig {
1161 min_genes: 1,
1162 ..Default::default()
1163 };
1164 let results = go_enrichment(&significant, &ann, 100, &config).unwrap();
1165 let r = results.iter().find(|r| r.term_id == "GO:0000001").unwrap();
1167 assert_eq!(r.overlap, 5);
1168 assert_eq!(r.term_name, "mito inheritance");
1169 assert_eq!(r.namespace, GoNamespace::BiologicalProcess);
1170 assert!(r.p_value < 0.01, "p={}", r.p_value);
1171 }
1172
1173 #[test]
1174 fn go_enrichment_size_filter_min() {
1175 let ann = GoAnnotation::new(make_go_terms()).unwrap();
1177 let config = GoEnrichmentConfig {
1178 min_genes: 11,
1179 max_genes: 500,
1180 namespace: None,
1181 };
1182 assert!(go_enrichment(&[0], &ann, 100, &config).is_err());
1183 }
1184
1185 #[test]
1186 fn go_enrichment_size_filter_max() {
1187 let ann = GoAnnotation::new(make_go_terms()).unwrap();
1189 let config = GoEnrichmentConfig {
1190 min_genes: 1,
1191 max_genes: 5,
1192 namespace: None,
1193 };
1194 assert!(go_enrichment(&[0], &ann, 100, &config).is_err());
1195 }
1196
1197 #[test]
1198 fn go_enrichment_namespace_filter() {
1199 let ann = GoAnnotation::new(make_go_terms()).unwrap();
1200 let config = GoEnrichmentConfig {
1201 min_genes: 1,
1202 max_genes: 500,
1203 namespace: Some(GoNamespace::MolecularFunction),
1204 };
1205 let results = go_enrichment(&[0, 1], &ann, 100, &config).unwrap();
1206 assert_eq!(results.len(), 1);
1207 assert_eq!(results[0].term_id, "GO:0000003");
1208 }
1209
1210 #[test]
1211 fn go_enrichment_no_terms_pass_error() {
1212 let ann = GoAnnotation::new(make_go_terms()).unwrap();
1213 let config = GoEnrichmentConfig {
1214 min_genes: 1,
1215 max_genes: 500,
1216 namespace: Some(GoNamespace::CellularComponent),
1217 };
1218 assert!(go_enrichment(&[0], &ann, 100, &config).is_err());
1219 }
1220
1221 #[test]
1222 fn go_enrichment_min_gt_max_error() {
1223 let ann = GoAnnotation::new(make_go_terms()).unwrap();
1224 let config = GoEnrichmentConfig {
1225 min_genes: 100,
1226 max_genes: 5,
1227 namespace: None,
1228 };
1229 assert!(go_enrichment(&[0], &ann, 100, &config).is_err());
1230 }
1231
1232 #[test]
1233 fn go_enrichment_sorted_by_pvalue() {
1234 let ann = GoAnnotation::new(make_go_terms()).unwrap();
1235 let significant = vec![0, 1, 2, 3, 4];
1236 let config = GoEnrichmentConfig {
1237 min_genes: 1,
1238 ..Default::default()
1239 };
1240 let results = go_enrichment(&significant, &ann, 100, &config).unwrap();
1241 for w in results.windows(2) {
1242 assert!(
1243 w[0].p_value <= w[1].p_value + 1e-15,
1244 "not sorted: {} > {}",
1245 w[0].p_value,
1246 w[1].p_value,
1247 );
1248 }
1249 }
1250
1251 #[test]
1252 fn go_enrichment_bh_correction() {
1253 let ann = GoAnnotation::new(make_go_terms()).unwrap();
1254 let significant = vec![0, 1, 2, 3, 4];
1255 let config = GoEnrichmentConfig {
1256 min_genes: 1,
1257 ..Default::default()
1258 };
1259 let results = go_enrichment(&significant, &ann, 100, &config).unwrap();
1260 for r in &results {
1261 assert!(
1262 r.p_adjusted >= r.p_value - 1e-15,
1263 "{}: padj={} < p={}",
1264 r.term_id,
1265 r.p_adjusted,
1266 r.p_value,
1267 );
1268 }
1269 }
1270}