use crate::cli::Strandedness;
use anyhow::{Context, Result};
use serde::Deserialize;
use std::collections::HashMap;
use std::path::Path;
#[derive(Debug, Deserialize, Default)]
#[serde(default)]
pub struct Config {
#[serde(default)]
pub rna: RnaConfig,
}
#[derive(Debug, Deserialize, Default)]
#[serde(default)]
pub struct RnaConfig {
#[serde(default)]
pub chromosome_prefix: Option<String>,
pub chromosome_mapping: HashMap<String, String>,
#[serde(default)]
pub stranded: Option<Strandedness>,
#[serde(default)]
pub paired: Option<bool>,
#[serde(default)]
pub sample_name: Option<String>,
#[serde(default)]
pub flat_output: bool,
#[serde(default)]
pub dupradar: DupradarConfig,
#[serde(default)]
pub featurecounts: FeatureCountsConfig,
#[serde(default)]
pub bam_stat: BamStatConfig,
#[serde(default)]
pub infer_experiment: InferExperimentConfig,
#[serde(default)]
pub read_duplication: ReadDuplicationConfig,
#[serde(default)]
pub read_distribution: ReadDistributionConfig,
#[serde(default)]
pub junction_annotation: JunctionAnnotationConfig,
#[serde(default)]
pub junction_saturation: JunctionSaturationConfig,
#[serde(default)]
pub inner_distance: InnerDistanceConfig,
#[serde(default)]
pub flagstat: FlagstatConfig,
#[serde(default)]
pub idxstats: IdxstatsConfig,
#[serde(default)]
pub tin: TinConfig,
#[serde(default)]
pub samtools_stats: SamtoolsStatsConfig,
#[serde(default)]
pub preseq: PreseqConfig,
#[serde(default)]
pub qualimap: QualimapConfig,
}
#[derive(Debug, Deserialize)]
#[serde(default)]
pub struct DupradarConfig {
pub dup_matrix: bool,
pub intercept_slope: bool,
pub density_scatter_plot: bool,
pub boxplot: bool,
pub expression_histogram: bool,
pub multiqc_intercept: bool,
pub multiqc_curve: bool,
}
impl Default for DupradarConfig {
fn default() -> Self {
Self {
dup_matrix: true,
intercept_slope: true,
density_scatter_plot: true,
boxplot: true,
expression_histogram: true,
multiqc_intercept: true,
multiqc_curve: true,
}
}
}
#[derive(Debug, Deserialize)]
#[serde(default)]
pub struct FeatureCountsConfig {
pub counts_file: bool,
pub summary_file: bool,
pub biotype_counts: bool,
pub biotype_counts_mqc: bool,
pub biotype_rrna_mqc: bool,
pub biotype_attribute: String,
}
impl Default for FeatureCountsConfig {
fn default() -> Self {
Self {
counts_file: true,
summary_file: true,
biotype_counts: true,
biotype_counts_mqc: true,
biotype_rrna_mqc: true,
biotype_attribute: "gene_biotype".to_string(),
}
}
}
#[derive(Debug, Deserialize)]
#[serde(default)]
pub struct BamStatConfig {
pub enabled: bool,
}
impl Default for BamStatConfig {
fn default() -> Self {
Self { enabled: true }
}
}
#[derive(Debug, Deserialize)]
#[serde(default)]
pub struct InferExperimentConfig {
pub enabled: bool,
pub sample_size: Option<u64>,
}
impl Default for InferExperimentConfig {
fn default() -> Self {
Self {
enabled: true,
sample_size: None,
}
}
}
#[derive(Debug, Deserialize)]
#[serde(default)]
pub struct ReadDuplicationConfig {
pub enabled: bool,
}
impl Default for ReadDuplicationConfig {
fn default() -> Self {
Self { enabled: true }
}
}
#[derive(Debug, Deserialize)]
#[serde(default)]
pub struct ReadDistributionConfig {
pub enabled: bool,
}
impl Default for ReadDistributionConfig {
fn default() -> Self {
Self { enabled: true }
}
}
#[derive(Debug, Deserialize)]
#[serde(default)]
pub struct JunctionAnnotationConfig {
pub enabled: bool,
pub min_intron: Option<u64>,
}
impl Default for JunctionAnnotationConfig {
fn default() -> Self {
Self {
enabled: true,
min_intron: None,
}
}
}
#[derive(Debug, Deserialize)]
#[serde(default)]
pub struct JunctionSaturationConfig {
pub enabled: bool,
pub min_coverage: Option<u64>,
pub percentile_floor: Option<u64>,
pub percentile_ceiling: Option<u64>,
pub percentile_step: Option<u64>,
pub seed: Option<u64>,
}
impl Default for JunctionSaturationConfig {
fn default() -> Self {
Self {
enabled: true,
min_coverage: None,
percentile_floor: None,
percentile_ceiling: None,
percentile_step: None,
seed: None,
}
}
}
#[derive(Debug, Deserialize)]
#[serde(default)]
pub struct InnerDistanceConfig {
pub enabled: bool,
pub sample_size: Option<u64>,
pub lower_bound: Option<i64>,
pub upper_bound: Option<i64>,
pub step: Option<i64>,
}
impl Default for InnerDistanceConfig {
fn default() -> Self {
Self {
enabled: true,
sample_size: None,
lower_bound: None,
upper_bound: None,
step: None,
}
}
}
#[derive(Debug, Deserialize)]
#[serde(default)]
pub struct FlagstatConfig {
pub enabled: bool,
}
impl Default for FlagstatConfig {
fn default() -> Self {
Self { enabled: true }
}
}
#[derive(Debug, Deserialize)]
#[serde(default)]
pub struct TinConfig {
pub enabled: bool,
pub sample_size: Option<u32>,
pub min_coverage: Option<u32>,
pub seed: Option<u64>,
}
impl Default for TinConfig {
fn default() -> Self {
Self {
enabled: true,
sample_size: None,
min_coverage: None,
seed: None,
}
}
}
#[derive(Debug, Deserialize)]
#[serde(default)]
pub struct QualimapConfig {
pub enabled: bool,
}
impl Default for QualimapConfig {
fn default() -> Self {
Self { enabled: true }
}
}
#[derive(Debug, Deserialize)]
#[serde(default)]
pub struct IdxstatsConfig {
pub enabled: bool,
}
impl Default for IdxstatsConfig {
fn default() -> Self {
Self { enabled: true }
}
}
#[derive(Debug, Deserialize)]
#[serde(default)]
pub struct SamtoolsStatsConfig {
pub enabled: bool,
}
impl Default for SamtoolsStatsConfig {
fn default() -> Self {
Self { enabled: true }
}
}
#[derive(Debug, Deserialize)]
#[serde(default)]
pub struct PreseqConfig {
pub enabled: bool,
pub max_extrap: f64,
pub step_size: f64,
pub n_bootstraps: u32,
pub confidence_level: f64,
pub seed: u64,
pub max_terms: usize,
pub max_segment_length: i64,
pub defects: bool,
}
impl Default for PreseqConfig {
fn default() -> Self {
Self {
enabled: true,
max_extrap: 1e10,
step_size: 1e6,
n_bootstraps: 100,
confidence_level: 0.95,
seed: 408,
max_terms: 100,
max_segment_length: 100_000_000,
defects: false,
}
}
}
impl Config {
pub fn from_file(path: &Path) -> Result<Self> {
let contents = std::fs::read_to_string(path)
.with_context(|| format!("Failed to read config file: {}", path.display()))?;
let config: Config = serde_yaml_ng::from_str(&contents)
.with_context(|| format!("Failed to parse config file: {}", path.display()))?;
Ok(config)
}
}
impl RnaConfig {
pub fn alignment_to_gtf_mapping(&self) -> HashMap<String, String> {
self.chromosome_mapping
.iter()
.map(|(gtf_name, aln_name)| (aln_name.clone(), gtf_name.clone()))
.collect()
}
pub fn chromosome_prefix(&self) -> Option<&str> {
self.chromosome_prefix.as_deref()
}
pub fn has_chromosome_mapping(&self) -> bool {
!self.chromosome_mapping.is_empty() || self.chromosome_prefix.is_some()
}
pub fn any_featurecounts_output(&self) -> bool {
let fc = &self.featurecounts;
fc.counts_file || fc.summary_file
}
pub fn any_biotype_output(&self) -> bool {
let fc = &self.featurecounts;
fc.biotype_counts || fc.biotype_counts_mqc || fc.biotype_rrna_mqc
}
pub fn any_dupradar_output(&self) -> bool {
let dr = &self.dupradar;
dr.dup_matrix
|| dr.intercept_slope
|| dr.density_scatter_plot
|| dr.boxplot
|| dr.expression_histogram
|| dr.multiqc_intercept
|| dr.multiqc_curve
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_top_level_rna_wrapper() {
let yaml = r#"
rna:
chromosome_prefix: "chr"
stranded: reverse
paired: true
bam_stat:
enabled: false
"#;
let config: Config = serde_yaml_ng::from_str(yaml).unwrap();
assert_eq!(config.rna.chromosome_prefix, Some("chr".to_string()));
assert_eq!(config.rna.stranded, Some(Strandedness::Reverse));
assert_eq!(config.rna.paired, Some(true));
assert!(!config.rna.bam_stat.enabled);
}
#[test]
fn test_empty_top_level_config() {
let config: Config = serde_yaml_ng::from_str("").unwrap();
assert!(config.rna.chromosome_mapping.is_empty());
assert!(config.rna.bam_stat.enabled);
assert!(config.rna.preseq.enabled);
}
#[test]
fn test_unknown_top_level_fields_ignored() {
let yaml = r#"
rna:
chromosome_prefix: "chr"
future_subcommand:
key: value
"#;
let config: Config = serde_yaml_ng::from_str(yaml).unwrap();
assert_eq!(config.rna.chromosome_prefix, Some("chr".to_string()));
}
#[test]
fn test_empty_rna_config() {
let config: RnaConfig = serde_yaml_ng::from_str("").unwrap();
assert!(config.chromosome_mapping.is_empty());
assert!(!config.has_chromosome_mapping());
assert_eq!(config.stranded, None);
assert_eq!(config.paired, None);
assert!(!config.flat_output);
assert!(config.dupradar.dup_matrix);
assert!(config.featurecounts.counts_file);
assert_eq!(config.featurecounts.biotype_attribute, "gene_biotype");
assert!(config.bam_stat.enabled);
assert!(config.infer_experiment.enabled);
assert!(config.read_duplication.enabled);
assert!(config.read_distribution.enabled);
assert!(config.junction_annotation.enabled);
assert!(config.junction_saturation.enabled);
assert!(config.inner_distance.enabled);
assert!(config.flagstat.enabled);
assert!(config.idxstats.enabled);
assert!(config.samtools_stats.enabled);
assert!(config.preseq.enabled);
assert!((config.preseq.max_extrap - 1e10).abs() < 1.0);
assert!((config.preseq.step_size - 1e6).abs() < 1.0);
assert_eq!(config.preseq.n_bootstraps, 100);
assert!((config.preseq.confidence_level - 0.95).abs() < 1e-10);
assert_eq!(config.preseq.seed, 408);
assert_eq!(config.preseq.max_terms, 100);
assert!(!config.preseq.defects);
}
#[test]
fn test_stranded_paired_config() {
let config: RnaConfig = serde_yaml_ng::from_str("").unwrap();
assert_eq!(config.stranded, None);
assert_eq!(config.paired, None);
let yaml = "stranded: reverse\npaired: true\n";
let config: RnaConfig = serde_yaml_ng::from_str(yaml).unwrap();
assert_eq!(config.stranded, Some(Strandedness::Reverse));
assert_eq!(config.paired, Some(true));
let yaml = "stranded: unstranded\npaired: false\n";
let config: RnaConfig = serde_yaml_ng::from_str(yaml).unwrap();
assert_eq!(config.stranded, Some(Strandedness::Unstranded));
assert_eq!(config.paired, Some(false));
let yaml = "stranded: forward\n";
let config: RnaConfig = serde_yaml_ng::from_str(yaml).unwrap();
assert_eq!(config.stranded, Some(Strandedness::Forward));
}
#[test]
fn test_flat_output_config() {
let yaml = "flat_output: true\n";
let config: RnaConfig = serde_yaml_ng::from_str(yaml).unwrap();
assert!(config.flat_output);
let yaml = "flat_output: false\n";
let config: RnaConfig = serde_yaml_ng::from_str(yaml).unwrap();
assert!(!config.flat_output);
}
#[test]
fn test_chromosome_mapping() {
let yaml = r#"
chromosome_mapping:
chr1: "1"
chr2: "2"
chrX: "X"
chrM: "MT"
"#;
let config: RnaConfig = serde_yaml_ng::from_str(yaml).unwrap();
assert_eq!(config.chromosome_mapping.len(), 4);
assert_eq!(config.chromosome_mapping.get("chr1").unwrap(), "1");
assert_eq!(config.chromosome_mapping.get("chrM").unwrap(), "MT");
let reverse = config.alignment_to_gtf_mapping();
assert_eq!(reverse.get("1").unwrap(), "chr1");
assert_eq!(reverse.get("MT").unwrap(), "chrM");
}
#[test]
fn test_unknown_rna_fields_ignored() {
let yaml = r#"
chromosome_mapping:
chr1: "1"
future_setting: true
another_section:
key: value
"#;
let config: RnaConfig = serde_yaml_ng::from_str(yaml).unwrap();
assert_eq!(config.chromosome_mapping.len(), 1);
}
#[test]
fn test_nested_tool_config() {
let yaml = r#"
dupradar:
dup_matrix: true
boxplot: false
featurecounts:
counts_file: true
summary_file: false
biotype_attribute: "gene_type"
"#;
let config: RnaConfig = serde_yaml_ng::from_str(yaml).unwrap();
assert!(config.dupradar.dup_matrix);
assert!(!config.dupradar.boxplot);
assert!(config.featurecounts.counts_file);
assert!(!config.featurecounts.summary_file);
assert_eq!(config.featurecounts.biotype_attribute, "gene_type");
}
#[test]
fn test_disable_all_dupradar() {
let yaml = r#"
dupradar:
dup_matrix: false
intercept_slope: false
density_scatter_plot: false
boxplot: false
expression_histogram: false
multiqc_intercept: false
multiqc_curve: false
"#;
let config: RnaConfig = serde_yaml_ng::from_str(yaml).unwrap();
assert!(!config.any_dupradar_output());
}
#[test]
fn test_disable_rseqc_tools() {
let yaml = r#"
bam_stat:
enabled: false
infer_experiment:
enabled: false
read_duplication:
enabled: false
read_distribution:
enabled: false
junction_annotation:
enabled: false
junction_saturation:
enabled: false
inner_distance:
enabled: false
"#;
let config: RnaConfig = serde_yaml_ng::from_str(yaml).unwrap();
assert!(!config.bam_stat.enabled);
assert!(!config.infer_experiment.enabled);
assert!(!config.read_duplication.enabled);
assert!(!config.read_distribution.enabled);
assert!(!config.junction_annotation.enabled);
assert!(!config.junction_saturation.enabled);
assert!(!config.inner_distance.enabled);
}
#[test]
fn test_rseqc_tool_params() {
let yaml = r#"
infer_experiment:
enabled: true
sample_size: 500000
junction_saturation:
enabled: true
min_coverage: 5
percentile_floor: 10
percentile_ceiling: 95
percentile_step: 10
inner_distance:
enabled: true
sample_size: 2000000
lower_bound: -500
upper_bound: 500
step: 10
"#;
let config: RnaConfig = serde_yaml_ng::from_str(yaml).unwrap();
assert_eq!(config.infer_experiment.sample_size, Some(500_000));
assert_eq!(config.junction_saturation.min_coverage, Some(5));
assert_eq!(config.junction_saturation.percentile_floor, Some(10));
assert_eq!(config.junction_saturation.percentile_ceiling, Some(95));
assert_eq!(config.junction_saturation.percentile_step, Some(10));
assert_eq!(config.inner_distance.sample_size, Some(2_000_000));
assert_eq!(config.inner_distance.lower_bound, Some(-500));
assert_eq!(config.inner_distance.upper_bound, Some(500));
assert_eq!(config.inner_distance.step, Some(10));
}
#[test]
fn test_preseq_config() {
let yaml = r#"
preseq:
enabled: true
seed: 1
max_segment_length: 500000
max_extrap: 5000000000
step_size: 500000
n_bootstraps: 50
confidence_level: 0.99
max_terms: 50
defects: true
"#;
let config: RnaConfig = serde_yaml_ng::from_str(yaml).unwrap();
assert!(config.preseq.enabled);
assert_eq!(config.preseq.seed, 1);
assert_eq!(config.preseq.max_segment_length, 500_000);
assert_eq!(config.preseq.max_extrap, 5_000_000_000.0);
assert_eq!(config.preseq.step_size, 500_000.0);
assert_eq!(config.preseq.n_bootstraps, 50);
assert_eq!(config.preseq.confidence_level, 0.99);
assert_eq!(config.preseq.max_terms, 50);
assert!(config.preseq.defects);
}
}