ggca/
lib.rs

1//! # Gene GEM Correlation Analysis (GGCA)
2//!
3//! Computes efficiently the correlation (Pearson, Spearman or Kendall) and the p-value (two-sided) between all the pairs from two datasets. It also supports [CpG Site IDs][cpg-site].
4//!
5//! ## Installation
6//!
7//! 1. Add crate to `Cargo.toml`: `ggca = "1.0.0"`
8//!
9//!
10//! ## Usage
11//!
12//! **Basic example**:
13//!
14//! ```ignore
15//! use ggca::adjustment::AdjustmentMethod;
16//! use ggca::analysis::Analysis;
17//! use ggca::correlation::CorrelationMethod;
18//!
19//! // File's paths
20//! let gene_file_path = "mrna.csv".to_string();
21//! let gem_file_path = "mirna.csv".to_string();
22//!
23//! // Some parameters
24//! let gem_contains_cpg = false;
25//! let is_all_vs_all = true;
26//! let keep_top_n = Some(10); // Keeps the top 10 of correlation (sorting by abs values)
27//! let collect_gem_dataset = None; // Better performance. Keep small GEM files in memory
28//!
29//! // Creates and run an analysis
30//! let analysis = Analysis {
31//!     gene_file_path,
32//!     gem_file_path,
33//!     gem_contains_cpg: false,
34//!     correlation_method: CorrelationMethod::Pearson,
35//!     correlation_threshold: 0.7,
36//!     sort_buf_size: 2_000_000,
37//!     adjustment_method: AdjustmentMethod::BenjaminiHochberg,
38//!     is_all_vs_all,
39//!     collect_gem_dataset,
40//!     keep_top_n,
41//! };
42//!
43//! let (result, _total_combinations_count, number_of_elements_evaluated) = analysis.compute().unwrap();
44//!
45//! println!(
46//!     "Number of elements -> {} of {} combinations evaluated",
47//!     result.len(),
48//!     number_of_elements_evaluated
49//! );
50//!
51//! for cor_p_value in result.iter() {
52//!     println!("{}", cor_p_value);
53//! }
54//! ```
55//!
56//! **With CpG Site IDs**:
57//!
58//! ```ignore
59//! use ggca::adjustment::AdjustmentMethod;
60//! use ggca::analysis::Analysis;
61//! use ggca::correlation::CorrelationMethod;
62//!
63//! // Datasets's paths
64//! let gene_file_path = "mrna.csv".to_string();
65//! let gem_file_path = "methylation_with_cpgs.csv".to_string();
66//!
67//! // Some parameters
68//! let gem_contains_cpg = true; // Second column in df2 contains CpG Site IDs
69//! let is_all_vs_all = false; // Only matching genes
70//! let keep_top_n = Some(10); // Keeps the top 10 of correlation (sorting by abs values)
71//! let collect_gem_dataset = None;
72//!
73//! let analysis = Analysis {
74//!     gene_file_path,
75//!     gem_file_path,
76//!     gem_contains_cpg,
77//!     correlation_method: CorrelationMethod::Pearson,
78//!     correlation_threshold: 0.8,
79//!     sort_buf_size: 2_000_000,
80//!     adjustment_method: AdjustmentMethod::Bonferroni,
81//!     is_all_vs_all,
82//!     collect_gem_dataset,
83//!     keep_top_n,
84//!
85//! };
86//!
87//! let (result, _total_combinations_count, number_of_elements_evaluated) = analysis.compute().unwrap();
88//!
89//! println!(
90//!     "Number of elements -> {} of {} combinations evaluated",
91//!     result.len(),
92//!     number_of_elements_evaluated
93//! );
94//!
95//! for cor_p_value in result.iter() {
96//!     println!("{}", cor_p_value);
97//! }
98//! ```
99//!
100//!
101//! ## More examples
102//!
103//! You can check the [examples][examples-folder] folder for more types of analysis!
104//!
105//!
106//! [cpg-site]: https://en.wikipedia.org/wiki/CpG_site
107//! [examples-folder]: https://github.com/jware-solutions/ggca/tree/master/examples
108
109pub mod adjustment;
110pub mod analysis;
111pub mod correlation;
112pub mod dataset;
113pub mod types;
114
115use adjustment::AdjustmentMethod;
116use analysis::{Analysis, GGCADiffSamples, GGCADiffSamplesLength};
117use correlation::{CorResult, CorrelationMethod};
118use dataset::GGCAError;
119use pyo3::wrap_pyfunction;
120use pyo3::{create_exception, prelude::*};
121use types::VecOfResults;
122
123// Errors
124create_exception!(
125    ggca,
126    InvalidCorrelationMethod,
127    pyo3::exceptions::PyException
128);
129create_exception!(ggca, InvalidAdjustmentMethod, pyo3::exceptions::PyException);
130
131// NOTE: Python has named arguments, so this linting warning can be disabled without sacrificing maintainability
132#[pyfunction]
133#[pyo3(signature = (gene_file_path, gem_file_path, correlation_method, correlation_threshold, sort_buf_size, adjustment_method, is_all_vs_all, gem_contains_cpg, collect_gem_dataset=None, keep_top_n=None))]
134#[allow(clippy::too_many_arguments)]
135fn correlate(
136    py: Python,
137    gene_file_path: String,
138    gem_file_path: String,
139    correlation_method: CorrelationMethod,
140    correlation_threshold: f64,
141    sort_buf_size: usize,
142    adjustment_method: AdjustmentMethod,
143    is_all_vs_all: bool,
144    gem_contains_cpg: bool,
145    collect_gem_dataset: Option<bool>,
146    keep_top_n: Option<usize>,
147) -> PyResult<(VecOfResults, usize, usize)> {
148    py.allow_threads(|| {
149        // Creates analysis and run
150        let analysis = Analysis {
151            gene_file_path,
152            gem_file_path,
153            gem_contains_cpg,
154            correlation_method,
155            correlation_threshold,
156            sort_buf_size,
157            adjustment_method,
158            is_all_vs_all,
159            collect_gem_dataset,
160            keep_top_n,
161        };
162
163        analysis.compute()
164    })
165}
166
167/// A Python module implemented in Rust.
168#[pymodule]
169fn ggca(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
170    // Functions
171    m.add_function(wrap_pyfunction!(correlate, m)?)?;
172
173    // Enums
174    m.add_class::<CorrelationMethod>()?;
175    m.add_class::<AdjustmentMethod>()?;
176
177    // Structs
178    m.add_class::<CorResult>()?;
179
180    // Errors
181    m.add("GGCAError", py.get_type_bound::<GGCAError>())?;
182    m.add(
183        "GGCADiffSamplesLength",
184        py.get_type_bound::<GGCADiffSamplesLength>(),
185    )?;
186    m.add("GGCADiffSamples", py.get_type_bound::<GGCADiffSamples>())?;
187    m.add(
188        "InvalidCorrelationMethod",
189        py.get_type_bound::<InvalidCorrelationMethod>(),
190    )?;
191    m.add(
192        "InvalidAdjustmentMethod",
193        py.get_type_bound::<InvalidAdjustmentMethod>(),
194    )?;
195    Ok(())
196}