fastqc_rust/modules/
n_content.rs1use std::io;
5
6use crate::config::{Limits, LimitsExt};
7use crate::modules::QCModule;
8use crate::report::charts::line_graph::{render_line_graph, LineGraphData};
9use crate::sequence::Sequence;
10use crate::utils::base_counts::{BASE_INDEX, IDX_N};
11use crate::utils::base_group::BaseGroup;
12use crate::utils::format::java_format_double;
13
14pub struct NContent {
15 n_counts: Vec<u64>,
16 not_n_counts: Vec<u64>,
17 nogroup: bool,
18 expgroup: bool,
19 min_length: usize,
20 limits: Limits,
21}
22
23impl NContent {
24 pub fn new(limits: &Limits, nogroup: bool, expgroup: bool, min_length: usize) -> Self {
25 NContent {
26 n_counts: Vec::new(),
27 not_n_counts: Vec::new(),
28 nogroup,
29 expgroup,
30 min_length,
31 limits: limits.clone(),
32 }
33 }
34
35 fn calculate(&self) -> NContentData {
36 let groups = BaseGroup::make_base_groups(
37 self.n_counts.len(),
38 self.min_length,
39 self.nogroup,
40 self.expgroup,
41 );
42
43 let mut x_categories = Vec::with_capacity(groups.len());
44 let mut percentages = vec![0.0f64; groups.len()];
45
46 for (i, group) in groups.iter().enumerate() {
47 x_categories.push(group.label());
48
49 let mut n_count: u64 = 0;
50 let mut total: u64 = 0;
51
52 for bp in group.lower_count..=group.upper_count {
55 n_count += self.n_counts[bp];
56 total += self.n_counts[bp];
57 total += self.not_n_counts[bp];
58 }
59
60 percentages[i] = 100.0 * (n_count as f64 / total as f64);
62 }
63
64 NContentData {
65 x_categories,
66 percentages,
67 }
68 }
69}
70
71impl NContent {
72 fn build_chart_svg(&self) -> String {
73 let data = self.calculate();
74
75 render_line_graph(&LineGraphData {
77 data: vec![data.percentages],
78 min_y: 0.0,
79 max_y: 100.0,
80 x_label: "Position in read (bp)".to_string(),
81 series_names: vec!["%N".to_string()],
82 x_categories: data.x_categories,
83 title: "N content across all bases".to_string(),
84 })
85 }
86}
87
88impl QCModule for NContent {
89 fn process_sequence(&mut self, sequence: &Sequence) {
90 let seq = &sequence.sequence;
91
92 if self.n_counts.len() < seq.len() {
94 self.n_counts.resize(seq.len(), 0);
95 self.not_n_counts.resize(seq.len(), 0);
96 }
97
98 for (i, &b) in seq.iter().enumerate() {
100 if BASE_INDEX[b as usize] as usize == IDX_N {
101 self.n_counts[i] += 1;
102 } else {
103 self.not_n_counts[i] += 1;
104 }
105 }
106 }
107
108 fn name(&self) -> &str {
109 "Per base N content"
110 }
111
112 fn description(&self) -> &str {
113 "Shows the percentage of bases at each position which are not being called"
114 }
115
116 fn reset(&mut self) {
117 self.n_counts.clear();
118 self.not_n_counts.clear();
119 }
120
121 fn raises_error(&self) -> bool {
122 let threshold = self.limits.threshold("n_content\terror", 20.0);
123 let data = self.calculate();
124 data.percentages.iter().any(|&p| p > threshold)
125 }
126
127 fn raises_warning(&self) -> bool {
128 let threshold = self.limits.threshold("n_content\twarn", 5.0);
129 let data = self.calculate();
130 data.percentages.iter().any(|&p| p > threshold)
131 }
132
133 fn ignore_filtered_sequences(&self) -> bool {
134 true
135 }
136
137 fn ignore_in_report(&self) -> bool {
138 self.limits.is_ignored("n_content")
139 }
140
141 fn write_text_report(&self, writer: &mut dyn io::Write) -> io::Result<()> {
142 let data = self.calculate();
143
144 writeln!(writer, "#Base\tN-Count")?;
146
147 for i in 0..data.x_categories.len() {
148 writeln!(
149 writer,
150 "{}\t{}",
151 data.x_categories[i],
152 java_format_double(data.percentages[i]),
153 )?;
154 }
155
156 Ok(())
157 }
158
159 fn chart_image_name(&self) -> Option<&str> {
161 Some("per_base_n_content")
162 }
163 fn chart_alt_text(&self) -> Option<&str> {
164 Some("N content graph")
165 }
166 fn generate_chart_svg(&self) -> Option<String> {
167 Some(self.build_chart_svg())
168 }
169}
170
171struct NContentData {
172 x_categories: Vec<String>,
173 percentages: Vec<f64>,
174}