use std::io;
use crate::config::{Limits, LimitsExt};
use crate::modules::QCModule;
use crate::report::charts::line_graph::{render_line_graph, LineGraphData};
use crate::sequence::Sequence;
use crate::utils::format::java_format_double;
pub struct SequenceLengthDistribution {
length_counts: Vec<u64>,
limits: Limits,
nogroup: bool,
computed: Option<ComputedDistribution>,
}
struct ComputedDistribution {
graph_counts: Vec<f64>,
x_categories: Vec<String>,
}
impl SequenceLengthDistribution {
pub fn new(limits: &Limits, nogroup: bool) -> Self {
SequenceLengthDistribution {
length_counts: Vec::new(),
limits: limits.clone(),
nogroup,
computed: None,
}
}
fn get_size_distribution(min: usize, max: usize, nogroup: bool) -> (usize, usize) {
if nogroup {
return (min, 1);
}
let mut base: usize = 1;
let divisions = [1, 2, 5];
let interval;
'outer: loop {
for &d in &divisions {
let tester = base * d;
if (max - min) / tester <= 50 {
interval = tester;
break 'outer;
}
}
base *= 10;
}
let basic_division = min / interval;
let starting = basic_division * interval;
(starting, interval)
}
fn calculate_distribution(&mut self) {
if self.computed.is_some() {
return;
}
let mut max_len: usize = 0;
let mut min_len: Option<usize> = None;
for i in 0..self.length_counts.len() {
if self.length_counts[i] > 0 {
if min_len.is_none() {
min_len = Some(i);
}
max_len = i;
}
}
let mut min_len = min_len.unwrap_or(0);
min_len = min_len.saturating_sub(1);
max_len += 1;
let (starting, interval) = Self::get_size_distribution(min_len, max_len, self.nogroup);
let mut categories = 0;
let mut current_value = starting;
while current_value <= max_len {
categories += 1;
current_value += interval;
}
let mut graph_counts = vec![0.0f64; categories];
let mut x_categories = Vec::with_capacity(categories);
for (i, gc) in graph_counts.iter_mut().enumerate() {
let min_value = starting + (interval * i);
let mut max_value = (starting + (interval * (i + 1))) - 1;
if max_value > max_len {
max_value = max_len;
}
for bp in min_value..=max_value {
if bp < self.length_counts.len() {
*gc += self.length_counts[bp] as f64;
}
}
if interval == 1 {
x_categories.push(format!("{}", min_value));
} else {
x_categories.push(format!("{}-{}", min_value, max_value));
}
}
self.computed = Some(ComputedDistribution {
graph_counts,
x_categories,
});
}
fn ensure_calculated(&self) -> &ComputedDistribution {
static DEFAULT: ComputedDistribution = ComputedDistribution {
graph_counts: Vec::new(),
x_categories: Vec::new(),
};
self.computed.as_ref().unwrap_or(&DEFAULT)
}
}
impl SequenceLengthDistribution {
fn build_chart_svg(&self) -> String {
let computed = self.ensure_calculated();
let max_val = computed
.graph_counts
.iter()
.cloned()
.fold(0.0_f64, f64::max);
let max_y = max_val;
render_line_graph(&LineGraphData {
data: vec![computed.graph_counts.clone()],
min_y: 0.0,
max_y,
x_label: "Sequence Length (bp)".to_string(),
series_names: vec!["Sequence Length".to_string()],
x_categories: computed.x_categories.clone(),
title: "Distribution of sequence lengths over all sequences".to_string(),
})
}
}
impl QCModule for SequenceLengthDistribution {
fn process_sequence(&mut self, sequence: &Sequence) {
self.computed = None;
let seq_len = sequence.sequence.len();
if seq_len + 2 > self.length_counts.len() {
self.length_counts.resize(seq_len + 2, 0);
}
self.length_counts[seq_len] += 1;
}
fn name(&self) -> &str {
"Sequence Length Distribution"
}
fn description(&self) -> &str {
"Shows the distribution of sequence length over all sequences"
}
fn reset(&mut self) {
self.length_counts.clear();
self.computed = None;
}
fn finalize(&mut self) {
self.calculate_distribution();
}
fn raises_error(&self) -> bool {
let threshold = self.limits.threshold("sequence_length\terror", 1.0);
if threshold == 0.0 {
return false;
}
if !self.length_counts.is_empty() && self.length_counts[0] > 0 {
return true;
}
false
}
fn raises_warning(&self) -> bool {
let threshold = self.limits.threshold("sequence_length\twarn", 1.0);
if threshold == 0.0 {
return false;
}
let mut seen_length = false;
for &count in &self.length_counts {
if count > 0 {
if seen_length {
return true;
}
seen_length = true;
}
}
false
}
fn ignore_filtered_sequences(&self) -> bool {
true
}
fn ignore_in_report(&self) -> bool {
self.limits.is_ignored("sequence_length")
}
fn write_text_report(&self, writer: &mut dyn io::Write) -> io::Result<()> {
let computed = self.ensure_calculated();
writeln!(writer, "#Length\tCount")?;
for i in 0..computed.x_categories.len() {
if (i == 0 || i == computed.x_categories.len() - 1) && computed.graph_counts[i] == 0.0 {
continue;
}
writeln!(
writer,
"{}\t{}",
computed.x_categories[i],
java_format_double(computed.graph_counts[i])
)?;
}
Ok(())
}
fn chart_image_name(&self) -> Option<&str> {
Some("sequence_length_distribution")
}
fn chart_alt_text(&self) -> Option<&str> {
Some("Sequence length distribution")
}
fn generate_chart_svg(&self) -> Option<String> {
Some(self.build_chart_svg())
}
}