ggca 1.0.0

Computes efficiently the correlation (Pearson, Spearman or Kendall) and the p-value (two-sided) between all the pairs from two datasets
Documentation
from typing import Optional, Tuple, List
from enum import Enum


class CorrelationMethod(Enum):
	"""All possible correlation methods."""
    Spearman = 1
    Kendall = 2
    Pearson = 3


class AdjustmentMethod(Enum):
	"""All possible p-values adjustment methods."""
    BenjaminiHochberg = 1
    BenjaminiYekutieli = 2
    Bonferroni = 3


class CorResult:
    gene: str
    gem: str
    cpg_site_id: Optional[str]
    correlation: Optional[float]
    p_value: Optional[float]
    adjusted_p_value: Optional[float]

    def __init__(
		self,
		gene: str,
		gem: str,
		cpg_site_id: Optional[str],
		correlation: Optional[float],
		p_value: Optional[float],
		adjusted_p_value: Optional[float],
	) -> None:
		"""
		Represents a correlation analysis result. Includes Gene, GEM, CpG Site ID (if specified) correlation statistic, p-value and adjusted p-value.

		:param gene: Gene name
		:param gem: Gene Expression Modulator (GEM) name
		:param cpg_site_id: CpG Site ID
		:param correlation: Correlation statistic (Pearson, Spearman or Kendall, as selected)
		:param p_value: P-value
		:param adjusted_p_value: Adjusted p-value (Benjamini-Hochberg, Benjamini-Yekutieli or Bonferroni, as selected)
		"""
		self.gene = gene
		self.gem = gem
		self.cpg_site_id = cpg_site_id
		self.correlation = correlation
		self.p_value = p_value
		self.adjusted_p_value = adjusted_p_value


def correlate(
	gene_file_path: str,
    gem_file_path: str,
    correlation_method: CorrelationMethod,
    correlation_threshold: float,
    sort_buf_size: int,
    adjustment_method: AdjustmentMethod,
    is_all_vs_all: bool,
    gem_contains_cpg: bool,
    collect_gem_dataset: Optional[bool],
    keep_top_n: Optional[int],
) -> Tuple[List[CorResult], int, int]:
	"""
	Computes the correlation between both mRNA and GEM files' rows.

	:param gene_file_path: Gene file's path
	:param gem_file_path: Gene Expression Modulator (GEM) file's path
	:param correlation_method: Correlation method to compute (Spearman = 1, Kendall = 2 or Pearson = 3)
	:param correlation_threshold: The threshold to discard all results whose correlation statistic values are below this value
	:param sort_buf_size: Number of elements to sort by block in disk during p-value adjustment process. Greater blocks are faster but consume more memory
	:param adjustment_method: P-value adjustment method (Benjamini-Hochberg = 1, Benjamini-Yekutieli = 2 or Bonferroni = 3)
	:param is_all_vs_all: True if all Genes must be evaluated with all GEMs. Otherwise, only matching Genes/GEM will be evaluated (useful for CNA or Methylation analysis)
	:param gem_contains_cpg: Set to True if your GEM data contains CpG Site IDs as the second column to preserve the GEM/CpG Site reference
	:param collect_gem_dataset: True to make the GEM dataset available in memory. This has a HUGE impact in analysis performance. Specify a boolean value to force or use None to allocate in memory automatically when GEM dataset size is small (<= 100MB)
	:param keep_top_n: Specify a number of results to keep or None to return all the resulting combinations
	:return: A tuple with a vec of CorResult, the number of combinations before truncating by 'keep_top_n' parameter and the number of combinations evaluated
	"""
	...


class GGCAError(Exception):
	"""Raises when a general error occurs, such as a read error, file does not exist, among others."""
	...


class GGCADiffSamplesLength(Exception):
	"""Raises when the length of samples in both datasets are different."""
	...


class GGCADiffSamples(Exception):
	"""Raises when Samples in both datasets are different, but they have the same length (maybe they are in different order)."""
	...

class InvalidCorrelationMethod(Exception):
	"""Raises when an invalid correlation method is provided. Only values 1 (Spearman), 2 (Kendall) or 3 (Pearson) are valid."""
	...

class InvalidAdjustmentMethod(Exception):
	"""Raises when an invalid adjustment method is provided. Only values 1 (Benjamini-Hochberg), 2 (Benjamini-Yekutieli) or 3 (Bonferroni) are valid."""
	...