fastqc_rust/modules/
sequence_length_distribution.rs1use std::io;
5
6use crate::config::{Limits, LimitsExt};
7use crate::modules::QCModule;
8use crate::report::charts::line_graph::{render_line_graph, LineGraphData};
9use crate::sequence::Sequence;
10use crate::utils::format::java_format_double;
11
12pub struct SequenceLengthDistribution {
13 length_counts: Vec<u64>,
15 limits: Limits,
16 nogroup: bool,
17 computed: Option<ComputedDistribution>,
19}
20
21struct ComputedDistribution {
22 graph_counts: Vec<f64>,
23 x_categories: Vec<String>,
24}
25
26impl SequenceLengthDistribution {
27 pub fn new(limits: &Limits, nogroup: bool) -> Self {
28 SequenceLengthDistribution {
29 length_counts: Vec::new(),
30 limits: limits.clone(),
31 nogroup,
32 computed: None,
33 }
34 }
35
36 fn get_size_distribution(min: usize, max: usize, nogroup: bool) -> (usize, usize) {
39 if nogroup {
41 return (min, 1);
42 }
43
44 let mut base: usize = 1;
46
47 let divisions = [1, 2, 5];
52 let interval;
53
54 'outer: loop {
55 for &d in &divisions {
56 let tester = base * d;
57 if (max - min) / tester <= 50 {
58 interval = tester;
59 break 'outer;
60 }
61 }
62 base *= 10;
63 }
64
65 let basic_division = min / interval;
67 let starting = basic_division * interval;
68
69 (starting, interval)
70 }
71
72 fn calculate_distribution(&mut self) {
73 if self.computed.is_some() {
74 return;
75 }
76
77 let mut max_len: usize = 0;
78 let mut min_len: Option<usize> = None;
79
80 for i in 0..self.length_counts.len() {
82 if self.length_counts[i] > 0 {
83 if min_len.is_none() {
84 min_len = Some(i);
85 }
86 max_len = i;
87 }
88 }
89
90 let mut min_len = min_len.unwrap_or(0);
92
93 min_len = min_len.saturating_sub(1);
95 max_len += 1;
96
97 let (starting, interval) = Self::get_size_distribution(min_len, max_len, self.nogroup);
98
99 let mut categories = 0;
101 let mut current_value = starting;
102 while current_value <= max_len {
103 categories += 1;
104 current_value += interval;
105 }
106
107 let mut graph_counts = vec![0.0f64; categories];
108 let mut x_categories = Vec::with_capacity(categories);
109
110 for (i, gc) in graph_counts.iter_mut().enumerate() {
112 let min_value = starting + (interval * i);
113 let mut max_value = (starting + (interval * (i + 1))) - 1;
114
115 if max_value > max_len {
117 max_value = max_len;
118 }
119
120 for bp in min_value..=max_value {
122 if bp < self.length_counts.len() {
123 *gc += self.length_counts[bp] as f64;
124 }
125 }
126
127 if interval == 1 {
129 x_categories.push(format!("{}", min_value));
130 } else {
131 x_categories.push(format!("{}-{}", min_value, max_value));
132 }
133 }
134
135 self.computed = Some(ComputedDistribution {
136 graph_counts,
137 x_categories,
138 });
139 }
140
141 fn ensure_calculated(&self) -> &ComputedDistribution {
142 static DEFAULT: ComputedDistribution = ComputedDistribution {
145 graph_counts: Vec::new(),
146 x_categories: Vec::new(),
147 };
148 self.computed.as_ref().unwrap_or(&DEFAULT)
149 }
150}
151
152impl SequenceLengthDistribution {
153 fn build_chart_svg(&self) -> String {
154 let computed = self.ensure_calculated();
155 let max_val = computed
156 .graph_counts
157 .iter()
158 .cloned()
159 .fold(0.0_f64, f64::max);
160 let max_y = max_val;
162
163 render_line_graph(&LineGraphData {
165 data: vec![computed.graph_counts.clone()],
166 min_y: 0.0,
167 max_y,
168 x_label: "Sequence Length (bp)".to_string(),
169 series_names: vec!["Sequence Length".to_string()],
170 x_categories: computed.x_categories.clone(),
171 title: "Distribution of sequence lengths over all sequences".to_string(),
172 })
173 }
174}
175
176impl QCModule for SequenceLengthDistribution {
177 fn process_sequence(&mut self, sequence: &Sequence) {
178 self.computed = None;
179 let seq_len = sequence.sequence.len();
180
181 if seq_len + 2 > self.length_counts.len() {
183 self.length_counts.resize(seq_len + 2, 0);
184 }
185
186 self.length_counts[seq_len] += 1;
187 }
188
189 fn name(&self) -> &str {
190 "Sequence Length Distribution"
191 }
192
193 fn description(&self) -> &str {
194 "Shows the distribution of sequence length over all sequences"
195 }
196
197 fn reset(&mut self) {
198 self.length_counts.clear();
199 self.computed = None;
200 }
201
202 fn finalize(&mut self) {
203 self.calculate_distribution();
204 }
205
206 fn raises_error(&self) -> bool {
207 let threshold = self.limits.threshold("sequence_length\terror", 1.0);
209 if threshold == 0.0 {
210 return false;
211 }
212
213 if !self.length_counts.is_empty() && self.length_counts[0] > 0 {
215 return true;
216 }
217 false
218 }
219
220 fn raises_warning(&self) -> bool {
221 let threshold = self.limits.threshold("sequence_length\twarn", 1.0);
223 if threshold == 0.0 {
224 return false;
225 }
226
227 let mut seen_length = false;
229 for &count in &self.length_counts {
230 if count > 0 {
231 if seen_length {
232 return true;
233 }
234 seen_length = true;
235 }
236 }
237 false
238 }
239
240 fn ignore_filtered_sequences(&self) -> bool {
241 true
242 }
243
244 fn ignore_in_report(&self) -> bool {
245 self.limits.is_ignored("sequence_length")
246 }
247
248 fn write_text_report(&self, writer: &mut dyn io::Write) -> io::Result<()> {
249 let computed = self.ensure_calculated();
250
251 writeln!(writer, "#Length\tCount")?;
252 for i in 0..computed.x_categories.len() {
253 if (i == 0 || i == computed.x_categories.len() - 1) && computed.graph_counts[i] == 0.0 {
255 continue;
256 }
257 writeln!(
258 writer,
259 "{}\t{}",
260 computed.x_categories[i],
261 java_format_double(computed.graph_counts[i])
262 )?;
263 }
264 Ok(())
265 }
266
267 fn chart_image_name(&self) -> Option<&str> {
269 Some("sequence_length_distribution")
270 }
271 fn chart_alt_text(&self) -> Option<&str> {
272 Some("Sequence length distribution")
273 }
274 fn generate_chart_svg(&self) -> Option<String> {
275 Some(self.build_chart_svg())
276 }
277}