ggca/lib.rs
1//! # Gene GEM Correlation Analysis (GGCA)
2//!
3//! Computes efficiently the correlation (Pearson, Spearman or Kendall) and the p-value (two-sided) between all the pairs from two datasets. It also supports [CpG Site IDs][cpg-site].
4//!
5//! ## Installation
6//!
7//! 1. Add crate to `Cargo.toml`: `ggca = "1.0.0"`
8//!
9//!
10//! ## Usage
11//!
12//! **Basic example**:
13//!
14//! ```ignore
15//! use ggca::adjustment::AdjustmentMethod;
16//! use ggca::analysis::Analysis;
17//! use ggca::correlation::CorrelationMethod;
18//!
19//! // File's paths
20//! let gene_file_path = "mrna.csv".to_string();
21//! let gem_file_path = "mirna.csv".to_string();
22//!
23//! // Some parameters
24//! let gem_contains_cpg = false;
25//! let is_all_vs_all = true;
26//! let keep_top_n = Some(10); // Keeps the top 10 of correlation (sorting by abs values)
27//! let collect_gem_dataset = None; // Better performance. Keep small GEM files in memory
28//!
29//! // Creates and run an analysis
30//! let analysis = Analysis {
31//! gene_file_path,
32//! gem_file_path,
33//! gem_contains_cpg: false,
34//! correlation_method: CorrelationMethod::Pearson,
35//! correlation_threshold: 0.7,
36//! sort_buf_size: 2_000_000,
37//! adjustment_method: AdjustmentMethod::BenjaminiHochberg,
38//! is_all_vs_all,
39//! collect_gem_dataset,
40//! keep_top_n,
41//! };
42//!
43//! let (result, _total_combinations_count, number_of_elements_evaluated) = analysis.compute().unwrap();
44//!
45//! println!(
46//! "Number of elements -> {} of {} combinations evaluated",
47//! result.len(),
48//! number_of_elements_evaluated
49//! );
50//!
51//! for cor_p_value in result.iter() {
52//! println!("{}", cor_p_value);
53//! }
54//! ```
55//!
56//! **With CpG Site IDs**:
57//!
58//! ```ignore
59//! use ggca::adjustment::AdjustmentMethod;
60//! use ggca::analysis::Analysis;
61//! use ggca::correlation::CorrelationMethod;
62//!
63//! // Datasets's paths
64//! let gene_file_path = "mrna.csv".to_string();
65//! let gem_file_path = "methylation_with_cpgs.csv".to_string();
66//!
67//! // Some parameters
68//! let gem_contains_cpg = true; // Second column in df2 contains CpG Site IDs
69//! let is_all_vs_all = false; // Only matching genes
70//! let keep_top_n = Some(10); // Keeps the top 10 of correlation (sorting by abs values)
71//! let collect_gem_dataset = None;
72//!
73//! let analysis = Analysis {
74//! gene_file_path,
75//! gem_file_path,
76//! gem_contains_cpg,
77//! correlation_method: CorrelationMethod::Pearson,
78//! correlation_threshold: 0.8,
79//! sort_buf_size: 2_000_000,
80//! adjustment_method: AdjustmentMethod::Bonferroni,
81//! is_all_vs_all,
82//! collect_gem_dataset,
83//! keep_top_n,
84//!
85//! };
86//!
87//! let (result, _total_combinations_count, number_of_elements_evaluated) = analysis.compute().unwrap();
88//!
89//! println!(
90//! "Number of elements -> {} of {} combinations evaluated",
91//! result.len(),
92//! number_of_elements_evaluated
93//! );
94//!
95//! for cor_p_value in result.iter() {
96//! println!("{}", cor_p_value);
97//! }
98//! ```
99//!
100//!
101//! ## More examples
102//!
103//! You can check the [examples][examples-folder] folder for more types of analysis!
104//!
105//!
106//! [cpg-site]: https://en.wikipedia.org/wiki/CpG_site
107//! [examples-folder]: https://github.com/jware-solutions/ggca/tree/master/examples
108
109pub mod adjustment;
110pub mod analysis;
111pub mod correlation;
112pub mod dataset;
113pub mod types;
114
115use adjustment::AdjustmentMethod;
116use analysis::{Analysis, GGCADiffSamples, GGCADiffSamplesLength};
117use correlation::{CorResult, CorrelationMethod};
118use dataset::GGCAError;
119use pyo3::wrap_pyfunction;
120use pyo3::{create_exception, prelude::*};
121use types::VecOfResults;
122
123// Errors
124create_exception!(
125 ggca,
126 InvalidCorrelationMethod,
127 pyo3::exceptions::PyException
128);
129create_exception!(ggca, InvalidAdjustmentMethod, pyo3::exceptions::PyException);
130
131// NOTE: Python has named arguments, so this linting warning can be disabled without sacrificing maintainability
132#[pyfunction]
133#[pyo3(signature = (gene_file_path, gem_file_path, correlation_method, correlation_threshold, sort_buf_size, adjustment_method, is_all_vs_all, gem_contains_cpg, collect_gem_dataset=None, keep_top_n=None))]
134#[allow(clippy::too_many_arguments)]
135fn correlate(
136 py: Python,
137 gene_file_path: String,
138 gem_file_path: String,
139 correlation_method: CorrelationMethod,
140 correlation_threshold: f64,
141 sort_buf_size: usize,
142 adjustment_method: AdjustmentMethod,
143 is_all_vs_all: bool,
144 gem_contains_cpg: bool,
145 collect_gem_dataset: Option<bool>,
146 keep_top_n: Option<usize>,
147) -> PyResult<(VecOfResults, usize, usize)> {
148 py.allow_threads(|| {
149 // Creates analysis and run
150 let analysis = Analysis {
151 gene_file_path,
152 gem_file_path,
153 gem_contains_cpg,
154 correlation_method,
155 correlation_threshold,
156 sort_buf_size,
157 adjustment_method,
158 is_all_vs_all,
159 collect_gem_dataset,
160 keep_top_n,
161 };
162
163 analysis.compute()
164 })
165}
166
167/// A Python module implemented in Rust.
168#[pymodule]
169fn ggca(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
170 // Functions
171 m.add_function(wrap_pyfunction!(correlate, m)?)?;
172
173 // Enums
174 m.add_class::<CorrelationMethod>()?;
175 m.add_class::<AdjustmentMethod>()?;
176
177 // Structs
178 m.add_class::<CorResult>()?;
179
180 // Errors
181 m.add("GGCAError", py.get_type_bound::<GGCAError>())?;
182 m.add(
183 "GGCADiffSamplesLength",
184 py.get_type_bound::<GGCADiffSamplesLength>(),
185 )?;
186 m.add("GGCADiffSamples", py.get_type_bound::<GGCADiffSamples>())?;
187 m.add(
188 "InvalidCorrelationMethod",
189 py.get_type_bound::<InvalidCorrelationMethod>(),
190 )?;
191 m.add(
192 "InvalidAdjustmentMethod",
193 py.get_type_bound::<InvalidAdjustmentMethod>(),
194 )?;
195 Ok(())
196}