fastqrab-steps 0.9.1

Pipeline building blocks for fastqrab: read transformations, filters, reports, and demultiplexing
Documentation
use std::cell::RefCell;

use super::super::reports::common::{PHRED33OFFSET, Q_LOOKUP};
use super::extract_numeric_tags_plus_all;
use crate::transformations::prelude::*;
use fastqrab_io::io::WrappedFastQRead;

const PHRED33_MAX: u8 = 126;

#[derive(Clone, Copy, JsonSchema)]
#[tpd]
#[derive(Debug)]
pub enum ExpectedErrorAggregate {
    Sum,
    Max,
}

/// Calculate expected error from (sanger, 33 based) PHRED
#[derive(Clone, JsonSchema)]
#[tpd]
#[derive(Debug)]
pub struct ExpectedError {
    pub out_label: TagLabel,

    #[schemars(with = "String")]
    #[tpd(adapt_in_verify(String))]
    pub segment: SegmentIndexOrAll,

    pub aggregate: ExpectedErrorAggregate,
}

impl VerifyIn<PartialConfig> for PartialExpectedError {
    fn verify(
        &mut self,
        parent: &PartialConfig,
        _options: &VerifyOptions,
    ) -> std::result::Result<(), ValidationFailure>
    where
        Self: Sized + toml_pretty_deser::Visitor,
    {
        self.segment.or(SegmentIndexOrAll::All);
        self.segment.validate_segment(parent);
        Ok(())
    }
}

impl TagUser for PartialTaggedVariant<PartialExpectedError> {
    fn get_tag_usage(
        &mut self,
        _tags_available: &IndexMap<TagLabel, TagMetadata>,
        _segment_order: &[String],
    ) -> Option<TagUsageInfo<'_>> {
        if let Some(inner) = self.toml_value.value.as_mut() {
            Some(TagUsageInfo {
                declared_tag: inner.out_label.to_declared_tag(TagValueType::Numeric((
                    Some(NonNaN::new(0.0).expect("can't fail")),
                    None,
                ))),
                ..Default::default()
            })
        } else {
            None // cov:excl-line
        }
    }
}

impl Step for ExpectedError {
    fn apply(
        &self,
        mut block: FastQBlocksCombined,
        _input_info: &InputInfo,
        _demultiplex_info: &OptDemultiplex,
    ) -> anyhow::Result<(FastQBlocksCombined, bool)> {
        let error_state: RefCell<Option<anyhow::Error>> = RefCell::new(None);

        let aggregate = self.aggregate;

        extract_numeric_tags_plus_all(
            self.segment,
            &self.out_label,
            |read| {
                if error_state.borrow().is_some() {
                    return f64::NAN; // cov:excl-line
                }
                match expected_error_for_read(read, aggregate) {
                    Ok(value) => value,
                    Err(err) => {
                        *error_state.borrow_mut() = Some(err);
                        f64::NAN
                    }
                }
            },
            |reads| {
                if error_state.borrow().is_some() {
                    return f64::NAN; // cov:excl-line
                }
                let mut aggregated_value = 0.0;
                for read in reads {
                    match expected_error_for_read(read, aggregate) {
                        Ok(value) => match aggregate {
                            ExpectedErrorAggregate::Sum => {
                                aggregated_value += value;
                            }
                            ExpectedErrorAggregate::Max => {
                                aggregated_value = aggregated_value.max(value);
                            }
                        },
                        Err(err) => {
                            *error_state.borrow_mut() = Some(err);
                            return f64::NAN;
                        }
                    }
                }
                aggregated_value
            },
            &mut block,
        );

        match error_state.into_inner() {
            Some(err) => Err(err),
            None => Ok((block, true)),
        }
    }
}

fn expected_error_for_read(
    read: &WrappedFastQRead,
    aggregate: ExpectedErrorAggregate,
) -> anyhow::Result<f64> {
    let mut agg = 0.0;

    for &quality in read.qual() {
        if !(PHRED33OFFSET..=PHRED33_MAX).contains(&quality) {
            let quality_display = BString::from(vec![quality]);
            let read_name = BString::from(read.name().to_vec());
            anyhow::bail!(
                "CalcExpectedError requires PHRED+33 encoded qualities (ASCII 33..=126). Observed byte {quality} ('{}') in read '{}'. Consider running ConvertQuality before CalcExpectedError.",
                quality_display.escape_ascii(),
                read_name.escape_ascii()
            );
        }
        let expected_error = Q_LOOKUP[quality as usize];
        match aggregate {
            ExpectedErrorAggregate::Sum => {
                agg += expected_error;
            }
            ExpectedErrorAggregate::Max => {
                agg = expected_error.max(agg);
            }
        }
    }

    Ok(agg)
}