use std::io;
use memchr::memmem;
use crate::config::{Limits, LimitsExt};
use crate::modules::QCModule;
use crate::report::charts::line_graph::{render_line_graph, LineGraphData};
use crate::sequence::Sequence;
use crate::utils::base_group::BaseGroup;
use crate::utils::format::java_format_double;
struct Adapter {
name: String,
finder: memmem::Finder<'static>,
positions: Vec<u64>,
}
impl Adapter {
fn new(name: &str, sequence: &str) -> Self {
let seq_bytes = sequence.as_bytes().to_vec();
let finder = memmem::Finder::new(&seq_bytes).into_owned();
Adapter {
name: name.to_string(),
finder,
positions: vec![0; 1],
}
}
fn expand_length_to(&mut self, new_length: usize) {
let old_len = self.positions.len();
if new_length > old_len {
let last_val = if old_len > 0 {
self.positions[old_len - 1]
} else {
0
};
self.positions.resize(new_length, last_val);
}
}
fn increment_count(&mut self, position: usize) {
self.positions[position] += 1;
}
}
pub struct AdapterContent {
adapters: Vec<Adapter>,
longest_sequence: usize,
longest_adapter: usize,
total_count: u64,
limits: Limits,
nogroup: bool,
expgroup: bool,
min_length: usize,
computed: Option<ComputedEnrichment>,
}
struct ComputedEnrichment {
enrichments: Vec<Vec<f64>>,
x_labels: Vec<String>,
}
impl AdapterContent {
pub fn new(
limits: &Limits,
adapter_entries: &[(String, String)],
nogroup: bool,
expgroup: bool,
min_length: usize,
) -> Self {
let mut longest_adapter = 0;
let mut adapters = Vec::with_capacity(adapter_entries.len());
for (name, seq) in adapter_entries {
if seq.len() > longest_adapter {
longest_adapter = seq.len();
}
adapters.push(Adapter::new(name, seq));
}
AdapterContent {
adapters,
longest_sequence: 0,
longest_adapter,
total_count: 0,
limits: limits.clone(),
nogroup,
expgroup,
min_length,
computed: None,
}
}
fn calculate_enrichment(&mut self) {
if self.computed.is_some() {
return;
}
let mut max_length = 0;
for adapter in &self.adapters {
if adapter.positions.len() > max_length {
max_length = adapter.positions.len();
}
}
let groups =
BaseGroup::make_base_groups(max_length, self.min_length, self.nogroup, self.expgroup);
let x_labels: Vec<String> = groups.iter().map(|g| g.label()).collect();
let mut enrichments = vec![vec![0.0f64; groups.len()]; self.adapters.len()];
for (a, adapter) in self.adapters.iter().enumerate() {
let positions = &adapter.positions;
for (g, group) in groups.iter().enumerate() {
let lower = group.lower_count; let upper = group.upper_count;
for p in lower..=upper {
if p < positions.len() {
enrichments[a][g] +=
(positions[p] as f64 * 100.0) / self.total_count as f64;
}
}
enrichments[a][g] /= (upper - lower + 1) as f64;
}
}
self.computed = Some(ComputedEnrichment {
enrichments,
x_labels,
});
}
fn adapter_names(&self) -> Vec<String> {
self.adapters.iter().map(|a| a.name.clone()).collect()
}
fn ensure_calculated(&self) -> &ComputedEnrichment {
static DEFAULT: ComputedEnrichment = ComputedEnrichment {
enrichments: Vec::new(),
x_labels: Vec::new(),
};
self.computed.as_ref().unwrap_or(&DEFAULT)
}
}
impl AdapterContent {
fn build_chart_svg(&self) -> String {
let computed = self.ensure_calculated();
render_line_graph(&LineGraphData {
data: computed.enrichments.clone(),
min_y: 0.0,
max_y: 100.0,
x_label: "Position in read (bp)".to_string(),
series_names: self.adapter_names(),
x_categories: computed.x_labels.clone(),
title: "% Adapter".to_string(),
})
}
}
impl QCModule for AdapterContent {
fn process_sequence(&mut self, sequence: &Sequence) {
self.computed = None;
self.total_count += 1;
let seq_len = sequence.sequence.len();
if seq_len > self.longest_sequence && seq_len > self.longest_adapter {
self.longest_sequence = seq_len;
let new_len = (self.longest_sequence - self.longest_adapter) + 1;
for adapter in &mut self.adapters {
adapter.expand_length_to(new_len);
}
}
let seq_bytes = &sequence.sequence;
let max_pos = self.longest_sequence.saturating_sub(self.longest_adapter);
for adapter in &mut self.adapters {
if let Some(index) = adapter.finder.find(seq_bytes) {
for i in index..=max_pos {
if i < adapter.positions.len() {
adapter.increment_count(i);
}
}
}
}
}
fn finalize(&mut self) {
self.calculate_enrichment();
}
fn name(&self) -> &str {
"Adapter Content"
}
fn description(&self) -> &str {
"Searches for specific adapter sequences in a library"
}
fn reset(&mut self) {
self.total_count = 0;
self.longest_sequence = 0;
self.computed = None;
for adapter in &mut self.adapters {
adapter.positions = vec![0; 1];
}
}
fn raises_error(&self) -> bool {
let threshold = self.limits.threshold("adapter\terror", 10.0);
let computed = self.ensure_calculated();
computed
.enrichments
.iter()
.any(|enrichments| enrichments.iter().any(|&val| val > threshold))
}
fn raises_warning(&self) -> bool {
if self.longest_adapter > self.longest_sequence {
return true;
}
let threshold = self.limits.threshold("adapter\twarn", 5.0);
let computed = self.ensure_calculated();
computed
.enrichments
.iter()
.any(|enrichments| enrichments.iter().any(|&val| val > threshold))
}
fn ignore_filtered_sequences(&self) -> bool {
true
}
fn ignore_in_report(&self) -> bool {
self.limits.is_ignored("adapter")
}
fn write_text_report(&self, writer: &mut dyn io::Write) -> io::Result<()> {
let computed = self.ensure_calculated();
write!(writer, "#Position")?;
for adapter in &self.adapters {
write!(writer, "\t{}", adapter.name)?;
}
writeln!(writer)?;
for (row, x_label) in computed.x_labels.iter().enumerate() {
write!(writer, "{}", x_label)?;
for a in 0..self.adapters.len() {
write!(
writer,
"\t{}",
java_format_double(computed.enrichments[a][row])
)?;
}
writeln!(writer)?;
}
Ok(())
}
fn chart_image_name(&self) -> Option<&str> {
Some("adapter_content")
}
fn chart_alt_text(&self) -> Option<&str> {
Some("Adapter graph")
}
fn generate_chart_svg(&self) -> Option<String> {
Some(self.build_chart_svg())
}
}