single_rust 0.5.8

Single-cell analysis in Rust
Documentation
use crate::connectors::utils::download_resource;
use crate::shared::utils::dataframe_from_csv_bytes;
use anyhow::anyhow;
use polars::export::arrow::array::ViewType;
use polars::prelude::pivot::pivot;
use polars::prelude::{ChunkCompareEq, DataFrame, PlSmallStr};
use single_utilities::types::PathwayNetwork;
use tokio::runtime::Runtime;

const OMNIPATH_BASE_URL: &str = "https://omnipathdb.org/annotations?databases=";

const LICENSE_TYPES: [&str; 3] = ["academic", "commercial", "nonprofit"];

pub fn load_resource(name: &str, license: &str, tax_id: Option<&str>) -> anyhow::Result<DataFrame> {
    if !LICENSE_TYPES.contains(&license) {
        return Err(anyhow!(
            "This license is not an allowed type, available are: academic, commercial, nonprofit."
        ));
    }

    let path = OMNIPATH_BASE_URL.to_owned() + format!("{}&license={}", name, license).as_str();

    let results = Runtime::new().unwrap().block_on(download_resource(&path))?;
    let res_bytes = results.to_bytes();
    let df = dataframe_from_csv_bytes(res_bytes, b'\t', true, None)?;
    let df = process_omnipath_dataframe(df, tax_id)?;
    Ok(df)
}

fn process_omnipath_dataframe(
    mut df: DataFrame,
    tax_id: Option<&str>,
) -> anyhow::Result<DataFrame> {
    let df = df.select(["genesymbol", "label", "value", "record_id"])?;

    let mut pivoted = pivot(
        &df,
        ["value"],
        Some(["genesymbol", "record_id"]),
        Some(["label"]),
        false,
        None,
        None,
    )?;

    if let Some(org) = tax_id {
        let p: PlSmallStr = org.into();
        if pivoted.get_column_names().contains(&&p) {
            let mask = pivoted.column("ncbi_tax_id")?.str()?.equal(org);
            pivoted = pivoted.filter(&mask)?;
        }
    }

    let cols_to_remove = ["record_id", "entity_type", "_entity_type"];
    let mut final_df = pivoted;

    for col in cols_to_remove {
        let c: PlSmallStr = col.into();
        if final_df.get_column_names().contains(&&c) {
            final_df = final_df.drop(col)?;
        }
    }
    Ok(final_df)
}

pub fn construct_network_from_panglaodb(
    license: &str,
    tax_id: Option<&str>,
    features: Vec<String>,
    tmin: u32,
) -> anyhow::Result<PathwayNetwork> {
    let df = load_resource("panglaodb", license, tax_id)?;
    let names: Vec<String> = df
        .column("cell_type")?
        .str()?
        .into_iter()
        .filter_map(|opt| opt.map(|s| s.to_string()))
        .collect();
    let targets: Vec<String> = df
        .column("genesymbol")?
        .str()?
        .into_iter()
        .filter_map(|opt| opt.map(|s| s.to_string()))
        .collect();
    Ok(PathwayNetwork::new_from_vec(
        names, targets, None, features, tmin,
    ))
}