fastqc_rust/modules/
per_base_sequence_content.rs1use std::io;
5
6use crate::config::{Limits, LimitsExt};
7use crate::modules::QCModule;
8use crate::report::charts::line_graph::{render_line_graph, LineGraphData};
9use crate::sequence::Sequence;
10use crate::utils::base_counts::{BASE_INDEX, IDX_A, IDX_C, IDX_G, IDX_T};
11use crate::utils::base_group::BaseGroup;
12use crate::utils::format::java_format_double;
13
14pub struct PerBaseSequenceContent {
15 counts: Vec<[u64; 4]>,
19 nogroup: bool,
20 expgroup: bool,
21 min_length: usize,
22 limits: Limits,
23}
24
25impl PerBaseSequenceContent {
26 pub fn new(limits: &Limits, nogroup: bool, expgroup: bool, min_length: usize) -> Self {
27 PerBaseSequenceContent {
28 counts: Vec::new(),
29 nogroup,
30 expgroup,
31 min_length,
32 limits: limits.clone(),
33 }
34 }
35
36 fn calculate(&self) -> ContentData {
37 let groups = BaseGroup::make_base_groups(
38 self.counts.len(),
39 self.min_length,
40 self.nogroup,
41 self.expgroup,
42 );
43
44 let mut x_categories = Vec::with_capacity(groups.len());
45 let mut g_percent = vec![0.0f64; groups.len()];
46 let mut a_percent = vec![0.0f64; groups.len()];
47 let mut t_percent = vec![0.0f64; groups.len()];
48 let mut c_percent = vec![0.0f64; groups.len()];
49
50 for (i, group) in groups.iter().enumerate() {
51 x_categories.push(group.label());
52
53 let mut a_count: u64 = 0;
54 let mut c_count: u64 = 0;
55 let mut g_count: u64 = 0;
56 let mut t_count: u64 = 0;
57 let mut total: u64 = 0;
58
59 for bp in group.lower_count..=group.upper_count {
63 let c = &self.counts[bp];
64 a_count += c[IDX_A];
65 c_count += c[IDX_C];
66 g_count += c[IDX_G];
67 t_count += c[IDX_T];
68 total += c[IDX_A] + c[IDX_C] + c[IDX_G] + c[IDX_T];
69 }
70
71 g_percent[i] = (g_count as f64 / total as f64) * 100.0;
72 a_percent[i] = (a_count as f64 / total as f64) * 100.0;
73 t_percent[i] = (t_count as f64 / total as f64) * 100.0;
74 c_percent[i] = (c_count as f64 / total as f64) * 100.0;
75 }
76
77 ContentData {
79 x_categories,
80 t_percent,
81 c_percent,
82 a_percent,
83 g_percent,
84 }
85 }
86}
87
88impl PerBaseSequenceContent {
89 fn build_chart_svg(&self) -> String {
90 let data = self.calculate();
91
92 render_line_graph(&LineGraphData {
95 data: vec![
96 data.t_percent,
97 data.c_percent,
98 data.a_percent,
99 data.g_percent,
100 ],
101 min_y: 0.0,
102 max_y: 100.0,
103 x_label: "Position in read (bp)".to_string(),
104 series_names: vec![
105 "%T".to_string(),
106 "%C".to_string(),
107 "%A".to_string(),
108 "%G".to_string(),
109 ],
110 x_categories: data.x_categories,
111 title: "Sequence content across all bases".to_string(),
112 })
113 }
114}
115
116impl QCModule for PerBaseSequenceContent {
117 fn process_sequence(&mut self, sequence: &Sequence) {
118 let seq = &sequence.sequence;
119
120 if self.counts.len() < seq.len() {
122 self.counts.resize(seq.len(), [0; 4]);
123 }
124
125 for (i, &b) in seq.iter().enumerate() {
129 let idx = BASE_INDEX[b as usize] as usize;
130 if idx < 4 {
131 self.counts[i][idx] += 1;
132 }
133 }
134 }
135
136 fn name(&self) -> &str {
137 "Per base sequence content"
138 }
139
140 fn description(&self) -> &str {
141 "Shows the relative amounts of each base at each position in a sequencing run"
142 }
143
144 fn reset(&mut self) {
145 self.counts.clear();
146 }
147
148 fn raises_error(&self) -> bool {
149 let error_threshold = self.limits.threshold("sequence\terror", 20.0);
150 let data = self.calculate();
151
152 for i in 0..data.g_percent.len() {
154 let gc_diff = (data.c_percent[i] - data.g_percent[i]).abs();
155 let at_diff = (data.t_percent[i] - data.a_percent[i]).abs();
156
157 if gc_diff > error_threshold || at_diff > error_threshold {
158 return true;
159 }
160 }
161 false
162 }
163
164 fn raises_warning(&self) -> bool {
165 let warn_threshold = self.limits.threshold("sequence\twarn", 10.0);
166 let data = self.calculate();
167
168 for i in 0..data.g_percent.len() {
169 let gc_diff = (data.c_percent[i] - data.g_percent[i]).abs();
170 let at_diff = (data.t_percent[i] - data.a_percent[i]).abs();
171
172 if gc_diff > warn_threshold || at_diff > warn_threshold {
173 return true;
174 }
175 }
176 false
177 }
178
179 fn ignore_filtered_sequences(&self) -> bool {
180 true
181 }
182
183 fn ignore_in_report(&self) -> bool {
184 self.limits.is_ignored("sequence")
185 }
186
187 fn write_text_report(&self, writer: &mut dyn io::Write) -> io::Result<()> {
188 let data = self.calculate();
189
190 writeln!(writer, "#Base\tG\tA\tT\tC")?;
192
193 for i in 0..data.x_categories.len() {
194 writeln!(
196 writer,
197 "{}\t{}\t{}\t{}\t{}",
198 data.x_categories[i],
199 java_format_double(data.g_percent[i]),
200 java_format_double(data.a_percent[i]),
201 java_format_double(data.t_percent[i]),
202 java_format_double(data.c_percent[i]),
203 )?;
204 }
205
206 Ok(())
207 }
208
209 fn chart_image_name(&self) -> Option<&str> {
211 Some("per_base_sequence_content")
212 }
213 fn chart_alt_text(&self) -> Option<&str> {
214 Some("Per base sequence content")
215 }
216 fn generate_chart_svg(&self) -> Option<String> {
217 Some(self.build_chart_svg())
218 }
219}
220
221struct ContentData {
222 x_categories: Vec<String>,
223 t_percent: Vec<f64>,
224 c_percent: Vec<f64>,
225 a_percent: Vec<f64>,
226 g_percent: Vec<f64>,
227}