stratiphy 0.1.5

Phenotype-driven identification of disease subgroups
Documentation
use std::collections::BTreeMap;

use ontolius::{TermId, TermIdParseError};

use crate::model::Cohort;

#[derive(Debug, PartialEq, Clone, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ClusteringWorkflowResult {
    pub affinity_matrix: Vec<f64>,
    pub cluster_labels: BTreeMap<u16, Vec<u16>>,
    pub sort_order: Option<Vec<usize>>,
    pub gap_values: Option<GapValues>,
    pub split_check: Option<SplitCheck>,
    pub term_associations: BTreeMap<u16, Vec<TermAssociation>>,
}

impl TryFrom<crate::io::generated::stratiphy_workflow::ClusteringWorkflowResult>
    for ClusteringWorkflowResult
{
    type Error = String;

    fn try_from(
        value: crate::io::generated::stratiphy_workflow::ClusteringWorkflowResult,
    ) -> Result<Self, Self::Error> {
        let cluster_labels: BTreeMap<u16, Vec<u16>> = value
            .cluster_labels
            .into_iter()
            .map(|cl| {
                (
                    u16::try_from(cl.k).unwrap(),
                    cl.labels.into_iter().map(convert_cluster_id).collect(),
                )
            })
            .collect();

        let sort_order = if value.sort_order.is_empty() {
            None
        } else {
            Some(
                value
                    .sort_order
                    .into_iter()
                    .map(|v| usize::try_from(v).unwrap())
                    .collect(),
            )
        };

        let gap_values = if let Some(gv) = value.gap_values {
            Some(GapValues::try_from(gv)?)
        } else {
            None
        };

        let split_check = value.split_check.map(SplitCheck::from);

        let mut term_associations = BTreeMap::new();
        for ele in value.term_associations {
            let mut tas = Vec::with_capacity(ele.associations.len());
            for ass in ele.associations {
                tas.push(TermAssociation::try_from(ass)?);
            }
            term_associations.insert(convert_cluster_id(ele.k), tas);
        }

        Ok(ClusteringWorkflowResult {
            affinity_matrix: value.affinity_matrix,
            cluster_labels,
            sort_order,
            gap_values,
            split_check,
            term_associations,
        })
    }
}

#[derive(Debug, PartialEq, Clone, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct GapValues {
    pub log_wk_data: BTreeMap<u16, f64>,
    pub log_wk_rand: BTreeMap<u16, Vec<f64>>,
}

impl TryFrom<crate::io::generated::stratiphy_workflow::GapValues> for GapValues {
    type Error = String;

    fn try_from(
        value: crate::io::generated::stratiphy_workflow::GapValues,
    ) -> Result<Self, Self::Error> {
        let log_wk_data = value
            .log_wk_data
            .into_iter()
            .map(|(k, v)| {
                (
                    u16::try_from(k).expect("cluster id should never exceed u16 range"),
                    v,
                )
            })
            .collect();
        let log_wk_rand = value
            .log_wk_rand
            .into_iter()
            .map(|v| (convert_cluster_id(v.k), v.values))
            .collect();
        Ok(GapValues {
            log_wk_data,
            log_wk_rand,
        })
    }
}

#[derive(Debug, PartialEq, Clone, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SplitCheck {
    pub should_split: bool,
    pub split_proba: f64,
}

impl From<crate::io::generated::stratiphy_workflow::SplitCheck> for SplitCheck {
    fn from(value: crate::io::generated::stratiphy_workflow::SplitCheck) -> Self {
        SplitCheck {
            should_split: value.should_split,
            split_proba: value.split_proba,
        }
    }
}

#[derive(Debug, PartialEq, Clone, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "camelCase")]
pub enum ObservationState {
    Unspecified,
    Present,
    Excluded,
}

impl From<crate::io::generated::stratiphy_workflow::ObservationState> for ObservationState {
    fn from(value: crate::io::generated::stratiphy_workflow::ObservationState) -> Self {
        match value {
            crate::io::generated::stratiphy_workflow::ObservationState::Unspecified => {
                ObservationState::Unspecified
            }
            crate::io::generated::stratiphy_workflow::ObservationState::Present => {
                ObservationState::Present
            }
            crate::io::generated::stratiphy_workflow::ObservationState::Excluded => {
                ObservationState::Excluded
            }
        }
    }
}

#[derive(Debug, PartialEq, Clone, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct TermCount {
    observation_state: ObservationState,
    count: u64,
}

#[derive(Debug, PartialEq, Clone, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "camelCase")]
pub enum NominalTestKind {
    FisherExactTest,
    FisherExactMCTest { n_iter: u64 },
    ChiSquareTest { df: u32 },
}

#[derive(Debug, PartialEq, Clone, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct NominalTestResult {
    pub pval: f64,
    pub kind: Option<NominalTestKind>,
}

impl From<crate::io::generated::stratiphy_workflow::term_association::NominalTestResult>
    for NominalTestResult
{
    fn from(
        value: crate::io::generated::stratiphy_workflow::term_association::NominalTestResult,
    ) -> Self {
        Self {
            pval: value.pval,
            kind: match value.kind {
                Some(k) => {
                    match k {
                        crate::io::generated::stratiphy_workflow::term_association::nominal_test_result::Kind::Fet(_) => Some(NominalTestKind::FisherExactTest),
                        crate::io::generated::stratiphy_workflow::term_association::nominal_test_result::Kind::FetMc(fisher_exact_mc_test) => {
                            Some(NominalTestKind::FisherExactMCTest { n_iter: fisher_exact_mc_test.n_iter })
                        },
                        crate::io::generated::stratiphy_workflow::term_association::nominal_test_result::Kind::Chi2(chi_square_test) => {
                            Some(NominalTestKind::ChiSquareTest { df: chi_square_test.df })
                        }
                    }
                },
                None => {None},
            }
        }
    }
}

#[derive(Debug, PartialEq, Clone, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct CorrectedTestResult {
    pub pval: f64,
}

impl From<crate::io::generated::stratiphy_workflow::term_association::CorrectedTestResult>
    for CorrectedTestResult
{
    fn from(
        value: crate::io::generated::stratiphy_workflow::term_association::CorrectedTestResult,
    ) -> Self {
        Self { pval: value.pval }
    }
}

#[derive(Debug, PartialEq, Clone, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct TermAssociation {
    #[serde(
        serialize_with = "TermId::serialize_as_curie",
        deserialize_with = "TermId::deserialize_from_curie"
    )]
    pub term_id: TermId,
    pub counts: BTreeMap<u16, Vec<TermCount>>,
    pub nominal_test: Option<NominalTestResult>,
    pub corrected_test: Option<CorrectedTestResult>,
    pub sensitivity: Option<f64>,
    pub effect: Option<f64>,
}

impl TryFrom<crate::io::generated::stratiphy_workflow::TermAssociation> for TermAssociation {
    type Error = String;

    fn try_from(
        value: crate::io::generated::stratiphy_workflow::TermAssociation,
    ) -> Result<Self, Self::Error> {
        let term_id: TermId = value
            .term_id
            .parse::<TermId>()
            .map_err(|e: TermIdParseError| format!("{}: {}", value.term_id, e))?;
        let counts: BTreeMap<u16, Vec<TermCount>> = value
            .counts
            .into_iter()
            .map(|tc| {
                (
                    convert_cluster_id(tc.cluster_id),
                    convert_term_counts(tc.counts),
                )
            })
            .collect();
        Ok(TermAssociation {
            term_id,
            counts,
            nominal_test: value.nominal_test.map(|val| val.into()),
            corrected_test: value.corrected_test.map(|val| val.into()),
            sensitivity: value.sensitivity,
            effect: value.effect,
        })
    }
}

#[derive(Debug, PartialEq, Clone, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ClusteringWorkflowMetadata {
    pub stratiphy_version: String,
    pub hpo_version: Option<String>,
}

impl From<crate::io::generated::stratiphy_workflow::ClusteringWorkflowMetadata>
    for ClusteringWorkflowMetadata
{
    fn from(value: crate::io::generated::stratiphy_workflow::ClusteringWorkflowMetadata) -> Self {
        ClusteringWorkflowMetadata {
            stratiphy_version: value.stratiphy_version,
            hpo_version: Some(value.hpo_version),
        }
    }
}

fn convert_cluster_id(val: u32) -> u16 {
    u16::try_from(val).expect("cluster id should never exceed the range of u16")
}

fn convert_term_counts(
    counts: Vec<crate::io::generated::stratiphy_workflow::term_association::term_counts::TermCount>,
) -> Vec<TermCount> {
    counts
        .into_iter()
        .map(|tc| TermCount {
            observation_state: tc.state().into(),
            count: tc.count as u64,
        })
        .collect()
}

#[derive(Debug, PartialEq, Clone, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct StratiphyResult {
    pub clustering_result: ClusteringWorkflowResult,
    pub cohort: Cohort,
    pub meta_data: ClusteringWorkflowMetadata,
}

impl TryFrom<crate::io::generated::stratiphy_workflow::StratiphyResult> for StratiphyResult {
    type Error = String;

    fn try_from(
        value: crate::io::generated::stratiphy_workflow::StratiphyResult,
    ) -> Result<Self, Self::Error> {
        let clustering_result = if let Some(cwr) = value.clustering_result {
            ClusteringWorkflowResult::try_from(cwr)?
        } else {
            return Err("Missing clustering result".to_string());
        };

        let cohort = if let Some(cohort) = value.cohort {
            Cohort::try_from(cohort)?
        } else {
            return Err("Missing cohort".to_string());
        };

        let meta_data = if let Some(meta_data) = value.meta_data {
            ClusteringWorkflowMetadata::from(meta_data)
        } else {
            return Err("Missing meta_data".to_string());
        };

        Ok(StratiphyResult {
            clustering_result,
            cohort,
            meta_data,
        })
    }
}