sciforge-hub 0.0.4

Central hub orchestrating Sciforge subsystems (api, engine, tools).
Documentation
//! Dispatch handler for genomics functions.

use super::super::params::*;
use crate::domain::biology as bio;
use crate::domain::common::errors::{HubError, HubResult};
use crate::engine::experience::runner::RunOutput;

pub(super) fn dispatch(func: &str, p: &Params) -> HubResult<RunOutput> {
    match func {
        "gene_density" => Ok(RunOutput::Scalar(bio::genomics::annotation::gene_density(
            get_u(p, "genes")?,
            get_f(p, "region_length_mb")?,
        ))),
        "cpg_enrichment" => Ok(RunOutput::Scalar(
            bio::genomics::annotation::cpg_enrichment(
                get_u(p, "cpg_count")?,
                get_u(p, "c_count")?,
                get_u(p, "g_count")?,
                get_u(p, "length")?,
            ),
        )),
        "codon_adaptation_index" => Ok(RunOutput::Scalar(
            bio::genomics::annotation::codon_adaptation_index(get_v(p, "codon_weights")?),
        )),
        "enc_wright" => Ok(RunOutput::Scalar(bio::genomics::annotation::enc_wright(
            get_v(p, "codon_family_homozygosities")?,
        ))),
        "repeat_density" => Ok(RunOutput::Scalar(
            bio::genomics::annotation::repeat_density(
                get_u(p, "repeat_bases")?,
                get_u(p, "total_bases")?,
            ),
        )),
        "synteny_score" => Ok(RunOutput::Scalar(bio::genomics::annotation::synteny_score(
            get_u(p, "conserved_blocks")?,
            get_u(p, "total_genes")?,
        ))),
        "genome_completeness_busco" => Ok(RunOutput::Scalar(
            bio::genomics::annotation::genome_completeness_busco(
                get_u(p, "complete")?,
                get_u(p, "fragmented")?,
                get_u(p, "total_buscos")?,
            ),
        )),
        "ka_ks_ratio" => Ok(RunOutput::Scalar(bio::genomics::annotation::ka_ks_ratio(
            get_f(p, "nonsynonymous_subs")?,
            get_f(p, "synonymous_subs")?,
            get_f(p, "nonsynonymous_sites")?,
            get_f(p, "synonymous_sites")?,
        ))),
        "gc_isochore" => Ok(RunOutput::Text(
            bio::genomics::annotation::gc_isochore(get_f(p, "gc_content")?).to_string(),
        )),
        "pwm_score" => {
            let sequence = get_str(p, "sequence")?.as_bytes();
            Ok(RunOutput::Scalar(bio::genomics::motifs::pwm_score(
                get_m(p, "pwm")?,
                sequence,
            )))
        }
        "pwm_scan" => {
            let sequence = get_str(p, "sequence")?.as_bytes();
            Ok(RunOutput::PairVec(
                bio::genomics::motifs::pwm_scan(get_m(p, "pwm")?, sequence, get_f(p, "threshold")?)
                    .into_iter()
                    .map(|(a, b)| (a as f64, b))
                    .collect(),
            ))
        }
        "information_content" => Ok(RunOutput::Vector(
            bio::genomics::motifs::information_content(get_m(p, "pwm")?),
        )),
        "total_information" => Ok(RunOutput::Scalar(bio::genomics::motifs::total_information(
            get_m(p, "pwm")?,
        ))),
        "consensus_sequence" => Ok(RunOutput::Text(bio::genomics::motifs::consensus_sequence(
            get_m(p, "pwm")?,
        ))),
        "frequency_matrix" => {
            let sequences_strs = get_str(p, "sequences")?;
            let sequences_parts: Vec<&[u8]> =
                sequences_strs.split(',').map(|s| s.as_bytes()).collect();
            Ok(RunOutput::Matrix(bio::genomics::motifs::frequency_matrix(
                &sequences_parts,
                get_u(p, "length")?,
            )))
        }
        "find_orfs" => Ok(RunOutput::Text(
            bio::genomics::orf::find_orfs(get_str(p, "sequence")?, get_u(p, "min_length")?)
                .into_iter()
                .map(|(a, b, s)| format!("{a}:{b}:{s}"))
                .collect::<Vec<_>>()
                .join(","),
        )),
        "codon_usage" => Ok(RunOutput::Text(
            bio::genomics::orf::codon_usage(get_str(p, "sequence")?)
                .into_iter()
                .map(|(s, n)| format!("{s}:{n}"))
                .collect::<Vec<_>>()
                .join(","),
        )),
        "reading_frame_proteins" => Ok(RunOutput::Text(
            bio::genomics::orf::reading_frame_proteins(get_str(p, "sequence")?, get_u(p, "frame")?)
                .join(","),
        )),
        "genomics_gc_content" => Ok(RunOutput::Scalar(bio::genomics::orf::gc_content(get_str(
            p, "sequence",
        )?))),
        "gc3_content" => Ok(RunOutput::Scalar(bio::genomics::orf::gc3_content(get_str(
            p, "sequence",
        )?))),
        "longest_orf_length" => Ok(RunOutput::Integer(bio::genomics::orf::longest_orf_length(
            get_str(p, "sequence")?,
        ) as i64)),
        "nucleotide_frequency" => {
            let r = bio::genomics::orf::nucleotide_frequency(get_str(p, "sequence")?);
            Ok(RunOutput::Vector(r.to_vec()))
        }
        "genomics_translate" => Ok(RunOutput::Text(bio::genomics::orf::translate(get_str(
            p, "sequence",
        )?))),
        "genomics_reverse_complement" => Ok(RunOutput::Text(
            bio::genomics::orf::reverse_complement(get_str(p, "sequence")?),
        )),
        "kmer_count" => {
            let sequence = get_str(p, "sequence")?.as_bytes();
            Ok(RunOutput::Text(
                bio::genomics::statistics::kmer_count(sequence, get_u(p, "k")?)
                    .into_iter()
                    .map(|(v, n)| format!("{}:{n}", String::from_utf8_lossy(&v)))
                    .collect::<Vec<_>>()
                    .join(","),
            ))
        }
        "gc_skew" => {
            let sequence = get_str(p, "sequence")?.as_bytes();
            Ok(RunOutput::Vector(bio::genomics::statistics::gc_skew(
                sequence,
                get_u(p, "window")?,
            )))
        }
        "cpg_observed_expected" => {
            let sequence = get_str(p, "sequence")?.as_bytes();
            Ok(RunOutput::Scalar(
                bio::genomics::statistics::cpg_observed_expected(sequence),
            ))
        }
        "linguistic_complexity" => {
            let sequence = get_str(p, "sequence")?.as_bytes();
            Ok(RunOutput::Scalar(
                bio::genomics::statistics::linguistic_complexity(sequence),
            ))
        }
        "at_content" => {
            let sequence = get_str(p, "sequence")?.as_bytes();
            Ok(RunOutput::Scalar(bio::genomics::statistics::at_content(
                sequence,
            )))
        }
        "dinucleotide_frequency" => {
            let sequence = get_str(p, "sequence")?.as_bytes();
            Ok(RunOutput::Matrix(
                bio::genomics::statistics::dinucleotide_frequency(sequence)
                    .into_iter()
                    .map(|(a, b, c)| vec![a as f64, b as f64, c])
                    .collect(),
            ))
        }
        "sequence_entropy" => {
            let sequence = get_str(p, "sequence")?.as_bytes();
            Ok(RunOutput::Scalar(
                bio::genomics::statistics::sequence_entropy(sequence),
            ))
        }
        "transition_transversion" => {
            let sequence_a = get_str(p, "sequence_a")?.as_bytes();
            let sequence_b = get_str(p, "sequence_b")?.as_bytes();
            Ok(RunOutput::Scalar(
                bio::genomics::statistics::transition_transversion(sequence_a, sequence_b),
            ))
        }
        "snp_allele_frequency" => Ok(RunOutput::Scalar(
            bio::genomics::variants::snp_allele_frequency(
                get_u(p, "alt_count")?,
                get_u(p, "total_alleles")?,
            ),
        )),
        "minor_allele_frequency" => Ok(RunOutput::Scalar(
            bio::genomics::variants::minor_allele_frequency(get_f(p, "allele_freq")?),
        )),
        "hardy_weinberg_expected" => {
            let (a, b, c) = bio::genomics::variants::hardy_weinberg_expected(get_f(p, "p")?);
            Ok(RunOutput::Triple(a, b, c))
        }
        "hardy_weinberg_chi_squared" => {
            let observed_v = get_v(p, "observed")?;
            let observed = [observed_v[0], observed_v[1], observed_v[2]];
            let expected_v = get_v(p, "expected")?;
            let expected = [expected_v[0], expected_v[1], expected_v[2]];
            Ok(RunOutput::Scalar(
                bio::genomics::variants::hardy_weinberg_chi_squared(&observed, &expected),
            ))
        }
        "ti_tv_ratio" => Ok(RunOutput::Scalar(bio::genomics::variants::ti_tv_ratio(
            get_u(p, "transitions")?,
            get_u(p, "transversions")?,
        ))),
        "heterozygosity" => Ok(RunOutput::Scalar(bio::genomics::variants::heterozygosity(
            get_v(p, "allele_freqs")?,
        ))),
        "fst_weir_cockerham" => Ok(RunOutput::Scalar(
            bio::genomics::variants::fst_weir_cockerham(
                get_f(p, "het_within")?,
                get_f(p, "het_total")?,
            ),
        )),
        "linkage_disequilibrium" => Ok(RunOutput::Scalar(
            bio::genomics::variants::linkage_disequilibrium(
                get_f(p, "freq_ab")?,
                get_f(p, "freq_a")?,
                get_f(p, "freq_b")?,
            ),
        )),
        "r_squared_ld" => Ok(RunOutput::Scalar(bio::genomics::variants::r_squared_ld(
            get_f(p, "d")?,
            get_f(p, "freq_a")?,
            get_f(p, "freq_b")?,
        ))),
        "d_prime" => Ok(RunOutput::Scalar(bio::genomics::variants::d_prime(
            get_f(p, "d")?,
            get_f(p, "freq_a")?,
            get_f(p, "freq_b")?,
        ))),
        "indel_frameshift" => Ok(RunOutput::Boolean(
            bio::genomics::variants::indel_frameshift(get_i(p, "indel_length")?),
        )),
        "copy_number_variant_dosage" => Ok(RunOutput::Scalar(
            bio::genomics::variants::copy_number_variant_dosage(
                get_f(p, "reads_sample")?,
                get_f(p, "reads_reference")?,
                get_f(p, "ploidy")?,
            ),
        )),
        "translate" => Ok(RunOutput::Text(bio::genomics::orf::translate(get_str(
            p, "sequence",
        )?))),
        "effective_number_of_codons" => {
            let s = get_str(p, "codons")?;
            let pairs: Vec<(String, usize)> = s
                .split(';')
                .filter_map(|entry| {
                    let mut parts = entry.splitn(2, ':');
                    let codon = parts.next()?.to_string();
                    let count = parts.next()?.parse().ok()?;
                    Some((codon, count))
                })
                .collect();
            Ok(RunOutput::Scalar(
                bio::genomics::orf::effective_number_of_codons(&pairs),
            ))
        }
        _ => Err(HubError::InvalidInput(format!("unknown function: {func}"))),
    }
}