kira-spliceqc 0.2.0

Deterministic, explainable splicing QC for single-cell expression data.
Documentation
use std::path::{Path, PathBuf};

use tracing::{error, info, warn};

use crate::cli::config::RunMode;
use crate::input::detect::{DetectedInput, detect_input};
use crate::input::error::InputError;
use crate::input::shared_cache::validate_dimensions;
use crate::input::{InputDescriptor, InputKind, OrganelleCacheInput};
use crate::input::{h5ad, tenx};

pub fn run_stage0(
    path: &Path,
    run_mode: RunMode,
    cache_override: Option<&Path>,
) -> Result<InputDescriptor, InputError> {
    if run_mode == RunMode::Pipeline {
        if let Some(cache_path) = cache_override {
            return descriptor_from_cache(path, cache_path.to_path_buf(), None);
        }
        if let Some(descriptor) = try_pipeline_cache(path)? {
            return Ok(descriptor);
        }
    }

    let result = match detect_input(path)? {
        DetectedInput::TenX(paths) => {
            info!("detected input format: 10x");
            let validation = tenx::validate(paths)?;
            info!(
                n_genes = validation.n_genes,
                n_cells = validation.n_cells,
                "input dimensions"
            );
            Ok(InputDescriptor {
                kind: InputKind::TenX(validation.input),
                n_genes: validation.n_genes,
                n_cells: validation.n_cells,
                has_multiple_samples: validation.has_multiple_samples,
                has_metadata: validation.has_metadata,
            })
        }
        DetectedInput::H5AD(path) => {
            info!("detected input format: h5ad");
            let validation = h5ad::validate(&path)?;
            info!(
                n_genes = validation.n_genes,
                n_cells = validation.n_cells,
                "input dimensions"
            );
            Ok(InputDescriptor {
                kind: InputKind::H5AD(validation.input),
                n_genes: validation.n_genes,
                n_cells: validation.n_cells,
                has_multiple_samples: validation.has_multiple_samples,
                has_metadata: validation.has_metadata,
            })
        }
    };

    if let Err(ref err) = result {
        error!(error = ?err, "stage0 input validation failed");
    }

    result
}

fn try_pipeline_cache(path: &Path) -> Result<Option<InputDescriptor>, InputError> {
    let meta = std::fs::metadata(path).map_err(|e| InputError::io(path, e))?;
    if !meta.is_dir() {
        return Ok(None);
    }

    let prefix = crate::input::detect::detect_prefix(path)?;
    let cache_name = crate::input::detect::resolve_shared_cache_filename(prefix.as_deref());
    let cache_path = path.join(cache_name);

    if !cache_path.exists() {
        warn!(
            expected_cache_path = %cache_path.display(),
            "shared cache not found in pipeline mode, falling back to MatrixMarket input"
        );
        return Ok(None);
    }

    Ok(Some(descriptor_from_cache(path, cache_path, prefix)?))
}

fn descriptor_from_cache(
    root: &Path,
    cache_path: PathBuf,
    prefix: Option<String>,
) -> Result<InputDescriptor, InputError> {
    let (n_genes, n_cells) = validate_dimensions(&cache_path)?;
    info!("detected input format: shared-cache");
    info!(n_genes = n_genes, n_cells = n_cells, "input dimensions");

    Ok(InputDescriptor {
        kind: InputKind::OrganelleCache(OrganelleCacheInput {
            root: root.to_path_buf(),
            cache_path,
            prefix,
        }),
        n_genes,
        n_cells,
        has_multiple_samples: false,
        has_metadata: root.join("metadata.tsv").exists() || root.join("metadata.tsv.gz").exists(),
    })
}