Skip to main content

fastqc_rust/modules/
adapter_content.rs

1// Adapter Content module
2// Corresponds to Modules/AdapterContent.java
3
4use std::io;
5
6use memchr::memmem;
7
8use crate::config::{Limits, LimitsExt};
9use crate::modules::QCModule;
10use crate::report::charts::line_graph::{render_line_graph, LineGraphData};
11use crate::sequence::Sequence;
12use crate::utils::base_group::BaseGroup;
13use crate::utils::format::java_format_double;
14
15/// A single adapter to search for in sequences.
16struct Adapter {
17    name: String,
18    /// Pre-compiled substring searcher for the adapter sequence.
19    /// Using memchr::memmem::Finder avoids re-compiling the searcher on every call
20    /// (str::find creates a new TwoWaySearcher per invocation, which was 60% of total runtime).
21    finder: memmem::Finder<'static>,
22    /// positions[i] = cumulative count of sequences with adapter found at or before position i.
23    positions: Vec<u64>,
24}
25
26impl Adapter {
27    fn new(name: &str, sequence: &str) -> Self {
28        let seq_bytes = sequence.as_bytes().to_vec();
29        let finder = memmem::Finder::new(&seq_bytes).into_owned();
30        Adapter {
31            name: name.to_string(),
32            finder,
33            positions: vec![0; 1],
34        }
35    }
36
37    /// Expand positions array to new_length, copying the last value
38    /// to newly added slots (to propagate cumulative counts).
39    fn expand_length_to(&mut self, new_length: usize) {
40        let old_len = self.positions.len();
41        if new_length > old_len {
42            let last_val = if old_len > 0 {
43                self.positions[old_len - 1]
44            } else {
45                0
46            };
47            self.positions.resize(new_length, last_val);
48        }
49    }
50
51    fn increment_count(&mut self, position: usize) {
52        self.positions[position] += 1;
53    }
54}
55
56pub struct AdapterContent {
57    adapters: Vec<Adapter>,
58    longest_sequence: usize,
59    longest_adapter: usize,
60    total_count: u64,
61    limits: Limits,
62    nogroup: bool,
63    expgroup: bool,
64    min_length: usize,
65    // Lazily computed
66    computed: Option<ComputedEnrichment>,
67}
68
69struct ComputedEnrichment {
70    enrichments: Vec<Vec<f64>>,
71    x_labels: Vec<String>,
72}
73
74impl AdapterContent {
75    pub fn new(
76        limits: &Limits,
77        adapter_entries: &[(String, String)],
78        nogroup: bool,
79        expgroup: bool,
80        min_length: usize,
81    ) -> Self {
82        let mut longest_adapter = 0;
83        let mut adapters = Vec::with_capacity(adapter_entries.len());
84
85        for (name, seq) in adapter_entries {
86            if seq.len() > longest_adapter {
87                longest_adapter = seq.len();
88            }
89            adapters.push(Adapter::new(name, seq));
90        }
91
92        AdapterContent {
93            adapters,
94            longest_sequence: 0,
95            longest_adapter,
96            total_count: 0,
97            limits: limits.clone(),
98            nogroup,
99            expgroup,
100            min_length,
101            computed: None,
102        }
103    }
104
105    /// Replicates calculateEnrichment() from AdapterContent.java.
106    fn calculate_enrichment(&mut self) {
107        if self.computed.is_some() {
108            return;
109        }
110
111        let mut max_length = 0;
112        for adapter in &self.adapters {
113            if adapter.positions.len() > max_length {
114                max_length = adapter.positions.len();
115            }
116        }
117
118        // Group positions using BaseGroup
119        let groups =
120            BaseGroup::make_base_groups(max_length, self.min_length, self.nogroup, self.expgroup);
121
122        let x_labels: Vec<String> = groups.iter().map(|g| g.label()).collect();
123
124        let mut enrichments = vec![vec![0.0f64; groups.len()]; self.adapters.len()];
125
126        for (a, adapter) in self.adapters.iter().enumerate() {
127            let positions = &adapter.positions;
128
129            for (g, group) in groups.iter().enumerate() {
130                // lowerCount() is 1-based in Java, we use 0-based internally
131                // Java: p=groups[g].lowerCount()-1; p<groups[g].upperCount()
132                let lower = group.lower_count; // already 0-based
133                let upper = group.upper_count; // already 0-based, inclusive
134
135                for p in lower..=upper {
136                    if p < positions.len() {
137                        enrichments[a][g] +=
138                            (positions[p] as f64 * 100.0) / self.total_count as f64;
139                    }
140                }
141
142                // Average over the group width
143                enrichments[a][g] /= (upper - lower + 1) as f64;
144            }
145        }
146
147        self.computed = Some(ComputedEnrichment {
148            enrichments,
149            x_labels,
150        });
151    }
152
153    /// Derive adapter names from the adapters Vec (avoids storing a redundant copy).
154    fn adapter_names(&self) -> Vec<String> {
155        self.adapters.iter().map(|a| a.name.clone()).collect()
156    }
157
158    fn ensure_calculated(&self) -> &ComputedEnrichment {
159        static DEFAULT: ComputedEnrichment = ComputedEnrichment {
160            enrichments: Vec::new(),
161            x_labels: Vec::new(),
162        };
163        self.computed.as_ref().unwrap_or(&DEFAULT)
164    }
165}
166
167impl AdapterContent {
168    fn build_chart_svg(&self) -> String {
169        let computed = self.ensure_calculated();
170
171        // Matches Java's `new LineGraph(enrichments, 0, 100, "Position in read (bp)", labels, xLabels, "% Adapter")`
172        render_line_graph(&LineGraphData {
173            data: computed.enrichments.clone(),
174            min_y: 0.0,
175            max_y: 100.0,
176            x_label: "Position in read (bp)".to_string(),
177            series_names: self.adapter_names(),
178            x_categories: computed.x_labels.clone(),
179            title: "% Adapter".to_string(),
180        })
181    }
182}
183
184impl QCModule for AdapterContent {
185    fn process_sequence(&mut self, sequence: &Sequence) {
186        self.computed = None;
187        self.total_count += 1;
188
189        let seq_len = sequence.sequence.len();
190
191        // Expand adapter positions if sequence is longer than before
192        // AND the last matchable position is positive
193        if seq_len > self.longest_sequence && seq_len > self.longest_adapter {
194            self.longest_sequence = seq_len;
195            let new_len = (self.longest_sequence - self.longest_adapter) + 1;
196            for adapter in &mut self.adapters {
197                adapter.expand_length_to(new_len);
198            }
199        }
200
201        // Search for each adapter in the sequence using pre-compiled searchers.
202        // Uses memchr::memmem::Finder on raw bytes — avoids UTF-8 conversion and
203        // re-compiling the search pattern on every call (was 60% of total runtime with str::find).
204        let seq_bytes = &sequence.sequence;
205        let max_pos = self.longest_sequence.saturating_sub(self.longest_adapter);
206
207        for adapter in &mut self.adapters {
208            if let Some(index) = adapter.finder.find(seq_bytes) {
209                // Once found at position index, increment all positions
210                // from index through longestSequence-longestAdapter
211                for i in index..=max_pos {
212                    if i < adapter.positions.len() {
213                        adapter.increment_count(i);
214                    }
215                }
216            }
217        }
218    }
219
220    fn finalize(&mut self) {
221        self.calculate_enrichment();
222    }
223
224    fn name(&self) -> &str {
225        "Adapter Content"
226    }
227
228    fn description(&self) -> &str {
229        "Searches for specific adapter sequences in a library"
230    }
231
232    fn reset(&mut self) {
233        self.total_count = 0;
234        self.longest_sequence = 0;
235        self.computed = None;
236        for adapter in &mut self.adapters {
237            adapter.positions = vec![0; 1];
238        }
239    }
240
241    fn raises_error(&self) -> bool {
242        let threshold = self.limits.threshold("adapter\terror", 10.0);
243        let computed = self.ensure_calculated();
244        computed
245            .enrichments
246            .iter()
247            .any(|enrichments| enrichments.iter().any(|&val| val > threshold))
248    }
249
250    fn raises_warning(&self) -> bool {
251        // Warn if adapters are longer than sequences
252        if self.longest_adapter > self.longest_sequence {
253            return true;
254        }
255
256        let threshold = self.limits.threshold("adapter\twarn", 5.0);
257        let computed = self.ensure_calculated();
258        computed
259            .enrichments
260            .iter()
261            .any(|enrichments| enrichments.iter().any(|&val| val > threshold))
262    }
263
264    fn ignore_filtered_sequences(&self) -> bool {
265        true
266    }
267
268    fn ignore_in_report(&self) -> bool {
269        self.limits.is_ignored("adapter")
270    }
271
272    fn write_text_report(&self, writer: &mut dyn io::Write) -> io::Result<()> {
273        let computed = self.ensure_calculated();
274
275        // Header line with Position tab and all adapter names
276        write!(writer, "#Position")?;
277        for adapter in &self.adapters {
278            write!(writer, "\t{}", adapter.name)?;
279        }
280        writeln!(writer)?;
281
282        // One row per base group, columns are Position then each adapter's enrichment
283        for (row, x_label) in computed.x_labels.iter().enumerate() {
284            write!(writer, "{}", x_label)?;
285            for a in 0..self.adapters.len() {
286                write!(
287                    writer,
288                    "\t{}",
289                    java_format_double(computed.enrichments[a][row])
290                )?;
291            }
292            writeln!(writer)?;
293        }
294
295        Ok(())
296    }
297
298    // Image filename matches Java's "adapter_content.png" in Images/
299    fn chart_image_name(&self) -> Option<&str> {
300        Some("adapter_content")
301    }
302    fn chart_alt_text(&self) -> Option<&str> {
303        Some("Adapter graph")
304    }
305    fn generate_chart_svg(&self) -> Option<String> {
306        Some(self.build_chart_svg())
307    }
308}