fastqc_rust/modules/
per_tile_quality.rs1use std::collections::HashMap;
5use std::io;
6
7use crate::config::{Limits, LimitsExt};
8use crate::modules::QCModule;
9use crate::report::charts::tile_graph::{render_tile_graph, TileGraphData};
10use crate::sequence::Sequence;
11use crate::utils::base_group::BaseGroup;
12use crate::utils::format::java_format_double;
13use crate::utils::quality_count::QualityCount;
14use crate::utils::{phred, quality_count};
15
16pub struct PerTileQualityScores {
17 per_tile_quality_counts: HashMap<i32, Vec<QualityCount>>,
18 current_length: usize,
19 total_count: u64,
20 split_position: i32,
23 ignore_in_report: bool,
24 nogroup: bool,
25 expgroup: bool,
26 min_length: usize,
27 limits: Limits,
28}
29
30impl PerTileQualityScores {
31 pub fn new(limits: &Limits, nogroup: bool, expgroup: bool, min_length: usize) -> Self {
32 PerTileQualityScores {
33 per_tile_quality_counts: HashMap::new(),
34 current_length: 0,
35 total_count: 0,
36 split_position: -1,
37 ignore_in_report: false,
38 nogroup,
39 expgroup,
40 min_length,
41 limits: limits.clone(),
42 }
43 }
44
45 fn calculate(&self) -> Option<TileCalculatedData> {
46 if self.per_tile_quality_counts.is_empty() {
47 return None;
48 }
49
50 let all_counts: Vec<&QualityCount> = self
52 .per_tile_quality_counts
53 .values()
54 .flat_map(|v| v.iter())
55 .collect();
56 let (min_char, _max_char) = quality_count::calculate_offsets(all_counts);
57 let offset = phred::detect(min_char).map(|e| e.offset).unwrap_or(33);
59
60 let groups = BaseGroup::make_base_groups(
61 self.current_length,
62 self.min_length,
63 self.nogroup,
64 self.expgroup,
65 );
66
67 let mut tile_numbers: Vec<i32> = self.per_tile_quality_counts.keys().copied().collect();
68 tile_numbers.sort();
69
70 let mut means = vec![vec![0.0f64; groups.len()]; tile_numbers.len()];
71 let mut x_labels = Vec::with_capacity(groups.len());
72
73 for (t, &tile) in tile_numbers.iter().enumerate() {
74 for (i, group) in groups.iter().enumerate() {
75 if t == 0 {
76 x_labels.push(group.label());
77 }
78 let min_base = group.lower_count;
79 let max_base = group.upper_count;
80 means[t][i] = self.get_mean(tile, min_base, max_base, offset);
81 }
82 }
83
84 let mut average_qualities_per_group = vec![0.0f64; groups.len()];
86 for tile_means in means.iter().take(tile_numbers.len()) {
87 for (avg, &m) in average_qualities_per_group
88 .iter_mut()
89 .zip(tile_means.iter())
90 {
91 *avg += m;
92 }
93 }
94 for avg in &mut average_qualities_per_group {
95 *avg /= tile_numbers.len() as f64;
96 }
97
98 let mut max_deviation: f64 = 0.0;
99 for (i, &avg) in average_qualities_per_group.iter().enumerate() {
101 for tile_means in means.iter_mut().take(tile_numbers.len()) {
102 tile_means[i] -= avg;
103 if tile_means[i].abs() > max_deviation {
104 max_deviation = tile_means[i].abs();
105 }
106 }
107 }
108
109 Some(TileCalculatedData {
110 tiles: tile_numbers,
111 means,
112 x_labels,
113 max_deviation,
114 })
115 }
116
117 fn get_mean(&self, tile: i32, min_base: usize, max_base: usize, offset: u8) -> f64 {
119 let quality_counts = match self.per_tile_quality_counts.get(&tile) {
120 Some(qc) => qc,
121 None => return 0.0,
122 };
123
124 let mut count = 0;
125 let mut total = 0.0;
126
127 for qc in &quality_counts[min_base..=max_base] {
128 if qc.get_total_count() > 0 {
129 count += 1;
130 total += qc.get_mean(offset);
131 }
132 }
133
134 if count > 0 {
135 total / count as f64
136 } else {
137 0.0
138 }
139 }
140}
141
142impl PerTileQualityScores {
143 fn build_chart_svg(&self) -> Option<String> {
144 let data = self.calculate()?;
145
146 let color_scale_max = self.limits.threshold("tile\terror", 5.0);
148
149 Some(render_tile_graph(&TileGraphData {
150 x_labels: data.x_labels,
151 tiles: data.tiles,
152 tile_base_means: data.means,
153 color_scale_max,
154 }))
155 }
156}
157
158impl QCModule for PerTileQualityScores {
159 fn process_sequence(&mut self, sequence: &Sequence) {
160 if self.total_count == 0 && self.limits.is_ignored("tile") {
162 self.ignore_in_report = true;
163 }
164
165 if self.ignore_in_report {
166 return;
167 }
168
169 if sequence.quality.is_empty() {
171 return;
172 }
173
174 self.total_count += 1;
175
176 if self.total_count > 10000 && !self.total_count.is_multiple_of(10) {
178 return;
179 }
180
181 if self.split_position < 0 {
184 let field_count = sequence.id.split(':').count();
185 if field_count >= 7 {
186 self.split_position = 4;
188 } else if field_count >= 5 {
189 self.split_position = 2;
191 } else {
192 self.ignore_in_report = true;
194 return;
195 }
196 }
197
198 let tile = match sequence
199 .id
200 .split(':')
201 .nth(self.split_position as usize)
202 .and_then(|f| f.parse::<i32>().ok())
203 {
204 Some(t) => t,
205 None => {
206 self.ignore_in_report = true;
207 return;
208 }
209 };
210
211 let qual = &sequence.quality;
212
213 if self.current_length < qual.len() {
215 for qc_vec in self.per_tile_quality_counts.values_mut() {
216 qc_vec.resize_with(qual.len(), QualityCount::new);
217 }
218 self.current_length = qual.len();
219 }
220
221 if !self.per_tile_quality_counts.contains_key(&tile) {
223 if self.per_tile_quality_counts.len() > 2500 {
224 eprintln!("Too many tiles (>2500) so giving up trying to do per-tile qualities since we're probably parsing the file wrongly");
226 self.ignore_in_report = true;
227 self.per_tile_quality_counts.clear();
228 return;
229 }
230
231 let mut quality_counts = Vec::new();
232 quality_counts.resize_with(self.current_length, QualityCount::new);
233 self.per_tile_quality_counts.insert(tile, quality_counts);
234 }
235
236 let quality_counts = self.per_tile_quality_counts.get_mut(&tile).unwrap();
237
238 for (i, &q) in qual.iter().enumerate() {
239 quality_counts[i].add_value(q);
240 }
241 }
242
243 fn name(&self) -> &str {
244 "Per tile sequence quality"
245 }
246
247 fn description(&self) -> &str {
248 "Shows the per tile Quality scores of all bases at a given position in a sequencing run"
249 }
250
251 fn reset(&mut self) {
252 self.total_count = 0;
253 self.per_tile_quality_counts.clear();
254 self.current_length = 0;
255 self.split_position = -1;
256 self.ignore_in_report = false;
257 }
258
259 fn raises_error(&self) -> bool {
260 let threshold = self.limits.threshold("tile\terror", 5.0);
261 self.calculate()
262 .is_some_and(|data| data.max_deviation > threshold)
263 }
264
265 fn raises_warning(&self) -> bool {
266 let threshold = self.limits.threshold("tile\twarn", 2.0);
267 self.calculate()
268 .is_some_and(|data| data.max_deviation > threshold)
269 }
270
271 fn ignore_filtered_sequences(&self) -> bool {
272 true
273 }
274
275 fn ignore_in_report(&self) -> bool {
276 self.ignore_in_report || self.limits.is_ignored("tile") || self.current_length == 0
278 }
279
280 fn write_text_report(&self, writer: &mut dyn io::Write) -> io::Result<()> {
281 let data = match self.calculate() {
282 Some(d) => d,
283 None => return Ok(()),
284 };
285
286 writeln!(writer, "#Tile\tBase\tMean")?;
288
289 for (t, &tile) in data.tiles.iter().enumerate() {
290 for i in 0..data.means[t].len() {
291 writeln!(
292 writer,
293 "{}\t{}\t{}",
294 tile,
295 data.x_labels[i],
296 java_format_double(data.means[t][i]),
297 )?;
298 }
299 }
300
301 Ok(())
302 }
303
304 fn chart_image_name(&self) -> Option<&str> {
306 Some("per_tile_quality")
307 }
308 fn chart_alt_text(&self) -> Option<&str> {
309 Some("Per tile sequence quality")
310 }
311 fn generate_chart_svg(&self) -> Option<String> {
312 self.build_chart_svg()
313 }
314}
315
316struct TileCalculatedData {
317 tiles: Vec<i32>,
318 means: Vec<Vec<f64>>,
319 x_labels: Vec<String>,
320 max_deviation: f64,
321}