use std::io;
use crate::config::{Limits, LimitsExt};
use crate::modules::QCModule;
use crate::report::charts::quality_boxplot::{render_quality_boxplot, QualityBoxPlotData};
use crate::sequence::Sequence;
use crate::utils::base_group::BaseGroup;
use crate::utils::format::java_format_double;
use crate::utils::phred;
use crate::utils::quality_count::{self, QualityCount};
pub struct PerBaseQualityScores {
quality_counts: Vec<QualityCount>,
nogroup: bool,
expgroup: bool,
min_length: usize,
limits: Limits,
}
impl PerBaseQualityScores {
pub fn new(limits: &Limits, nogroup: bool, expgroup: bool, min_length: usize) -> Self {
PerBaseQualityScores {
quality_counts: Vec::new(),
nogroup,
expgroup,
min_length,
limits: limits.clone(),
}
}
fn calculate(&self) -> CalculatedData {
let (min_char, _max_char) = quality_count::calculate_offsets(&self.quality_counts);
let offset = phred::detect(min_char).map(|e| e.offset).unwrap_or(33);
let groups = BaseGroup::make_base_groups(
self.quality_counts.len(),
self.min_length,
self.nogroup,
self.expgroup,
);
let mut means = vec![0.0f64; groups.len()];
let mut medians = vec![0.0f64; groups.len()];
let mut lower_quartile = vec![0.0f64; groups.len()];
let mut upper_quartile = vec![0.0f64; groups.len()];
let mut lowest = vec![0.0f64; groups.len()];
let mut highest = vec![0.0f64; groups.len()];
let mut x_labels = Vec::with_capacity(groups.len());
for (i, group) in groups.iter().enumerate() {
x_labels.push(group.label());
let min_base = group.lower_count;
let max_base = group.upper_count;
lowest[i] = self.get_percentile(min_base, max_base, offset, 10);
highest[i] = self.get_percentile(min_base, max_base, offset, 90);
means[i] = self.get_mean(min_base, max_base, offset);
medians[i] = self.get_percentile(min_base, max_base, offset, 50);
lower_quartile[i] = self.get_percentile(min_base, max_base, offset, 25);
upper_quartile[i] = self.get_percentile(min_base, max_base, offset, 75);
}
CalculatedData {
means,
medians,
lower_quartile,
upper_quartile,
lowest,
highest,
x_labels,
}
}
fn get_percentile(&self, min_base: usize, max_base: usize, offset: u8, percentile: u8) -> f64 {
let mut count = 0;
let mut total = 0.0;
for i in min_base..=max_base {
if self.quality_counts[i].get_total_count() > 100 {
count += 1;
total += self.quality_counts[i].get_percentile(offset, percentile);
}
}
if count > 0 {
total / count as f64
} else {
f64::NAN
}
}
fn get_mean(&self, min_base: usize, max_base: usize, offset: u8) -> f64 {
let mut count = 0;
let mut total = 0.0;
for i in min_base..=max_base {
if self.quality_counts[i].get_total_count() > 0 {
count += 1;
total += self.quality_counts[i].get_mean(offset);
}
}
if count > 0 {
total / count as f64
} else {
0.0
}
}
}
impl PerBaseQualityScores {
fn build_chart_svg(&self) -> String {
let data = self.calculate();
let (min_char, max_char) = quality_count::calculate_offsets(&self.quality_counts);
let (offset, encoding_name) = phred::detect(min_char)
.map(|e| (e.offset, e.name))
.unwrap_or((33, "Sanger / Illumina 1.9"));
let title = format!(
"Quality scores across all bases ({} encoding)",
encoding_name
);
let mut max_y = (max_char as i32 - offset as i32).max(0) as f64;
if max_y < 35.0 {
max_y = 35.0;
}
let y_interval = 2.0;
let min_y = 0.0;
render_quality_boxplot(&QualityBoxPlotData {
means: data.means,
medians: data.medians,
lower_quartile: data.lower_quartile,
upper_quartile: data.upper_quartile,
lowest: data.lowest,
highest: data.highest,
min_y,
max_y,
y_interval,
x_labels: data.x_labels,
title,
})
}
}
impl QCModule for PerBaseQualityScores {
fn process_sequence(&mut self, sequence: &Sequence) {
let qual = &sequence.quality;
if self.quality_counts.len() < qual.len() {
self.quality_counts
.resize_with(qual.len(), QualityCount::new);
}
for (i, &q) in qual.iter().enumerate() {
self.quality_counts[i].add_value(q);
}
}
fn name(&self) -> &str {
"Per base sequence quality"
}
fn description(&self) -> &str {
"Shows the Quality scores of all bases at a given position in a sequencing run"
}
fn reset(&mut self) {
self.quality_counts.clear();
}
fn raises_error(&self) -> bool {
let data = self.calculate();
let lq_error = self.limits.threshold("quality_base_lower\terror", 5.0);
let median_error = self.limits.threshold("quality_base_median\terror", 20.0);
for i in 0..data.lower_quartile.len() {
if data.lower_quartile[i].is_nan() {
continue;
}
if data.lower_quartile[i] < lq_error || data.medians[i] < median_error {
return true;
}
}
false
}
fn raises_warning(&self) -> bool {
let data = self.calculate();
let lq_warn = self.limits.threshold("quality_base_lower\twarn", 10.0);
let median_warn = self.limits.threshold("quality_base_median\twarn", 25.0);
for i in 0..data.lower_quartile.len() {
if data.lower_quartile[i].is_nan() {
continue;
}
if data.lower_quartile[i] < lq_warn || data.medians[i] < median_warn {
return true;
}
}
false
}
fn ignore_filtered_sequences(&self) -> bool {
true
}
fn ignore_in_report(&self) -> bool {
self.limits.is_ignored("quality_base") || self.quality_counts.is_empty()
}
fn write_text_report(&self, writer: &mut dyn io::Write) -> io::Result<()> {
let data = self.calculate();
writeln!(
writer,
"#Base\tMean\tMedian\tLower Quartile\tUpper Quartile\t10th Percentile\t90th Percentile"
)?;
for i in 0..data.means.len() {
writeln!(
writer,
"{}\t{}\t{}\t{}\t{}\t{}\t{}",
data.x_labels[i],
java_format_double(data.means[i]),
java_format_double(data.medians[i]),
java_format_double(data.lower_quartile[i]),
java_format_double(data.upper_quartile[i]),
java_format_double(data.lowest[i]),
java_format_double(data.highest[i]),
)?;
}
Ok(())
}
fn chart_image_name(&self) -> Option<&str> {
Some("per_base_quality")
}
fn chart_alt_text(&self) -> Option<&str> {
Some("Per base quality graph")
}
fn generate_chart_svg(&self) -> Option<String> {
Some(self.build_chart_svg())
}
}
struct CalculatedData {
means: Vec<f64>,
medians: Vec<f64>,
lower_quartile: Vec<f64>,
upper_quartile: Vec<f64>,
lowest: Vec<f64>,
highest: Vec<f64>,
x_labels: Vec<String>,
}