Skip to main content

fastqc_rust/modules/
n_content.rs

1// Per Base N Content module
2// Corresponds to Modules/NContent.java
3
4use std::io;
5
6use crate::config::{Limits, LimitsExt};
7use crate::modules::QCModule;
8use crate::report::charts::line_graph::{render_line_graph, LineGraphData};
9use crate::sequence::Sequence;
10use crate::utils::base_counts::{BASE_INDEX, IDX_N};
11use crate::utils::base_group::BaseGroup;
12use crate::utils::format::java_format_double;
13
14pub struct NContent {
15    n_counts: Vec<u64>,
16    not_n_counts: Vec<u64>,
17    nogroup: bool,
18    expgroup: bool,
19    min_length: usize,
20    limits: Limits,
21}
22
23impl NContent {
24    pub fn new(limits: &Limits, nogroup: bool, expgroup: bool, min_length: usize) -> Self {
25        NContent {
26            n_counts: Vec::new(),
27            not_n_counts: Vec::new(),
28            nogroup,
29            expgroup,
30            min_length,
31            limits: limits.clone(),
32        }
33    }
34
35    fn calculate(&self) -> NContentData {
36        let groups = BaseGroup::make_base_groups(
37            self.n_counts.len(),
38            self.min_length,
39            self.nogroup,
40            self.expgroup,
41        );
42
43        let mut x_categories = Vec::with_capacity(groups.len());
44        let mut percentages = vec![0.0f64; groups.len()];
45
46        for (i, group) in groups.iter().enumerate() {
47            x_categories.push(group.label());
48
49            let mut n_count: u64 = 0;
50            let mut total: u64 = 0;
51
52            // Java iterates `for (int bp=groups[i].lowerCount()-1;bp<groups[i].upperCount();bp++)`
53            // Our lower_count/upper_count are 0-based.
54            for bp in group.lower_count..=group.upper_count {
55                n_count += self.n_counts[bp];
56                total += self.n_counts[bp];
57                total += self.not_n_counts[bp];
58            }
59
60            // percentages[i] = 100 * (nCount / (double)total)
61            percentages[i] = 100.0 * (n_count as f64 / total as f64);
62        }
63
64        NContentData {
65            x_categories,
66            percentages,
67        }
68    }
69}
70
71impl NContent {
72    fn build_chart_svg(&self) -> String {
73        let data = self.calculate();
74
75        // minY=0, maxY=100 for percentage, matching Java's constructor
76        render_line_graph(&LineGraphData {
77            data: vec![data.percentages],
78            min_y: 0.0,
79            max_y: 100.0,
80            x_label: "Position in read (bp)".to_string(),
81            series_names: vec!["%N".to_string()],
82            x_categories: data.x_categories,
83            title: "N content across all bases".to_string(),
84        })
85    }
86}
87
88impl QCModule for NContent {
89    fn process_sequence(&mut self, sequence: &Sequence) {
90        let seq = &sequence.sequence;
91
92        // Grow arrays if needed
93        if self.n_counts.len() < seq.len() {
94            self.n_counts.resize(seq.len(), 0);
95            self.not_n_counts.resize(seq.len(), 0);
96        }
97
98        // Use lookup table to classify each byte without a multi-way match
99        for (i, &b) in seq.iter().enumerate() {
100            if BASE_INDEX[b as usize] as usize == IDX_N {
101                self.n_counts[i] += 1;
102            } else {
103                self.not_n_counts[i] += 1;
104            }
105        }
106    }
107
108    fn name(&self) -> &str {
109        "Per base N content"
110    }
111
112    fn description(&self) -> &str {
113        "Shows the percentage of bases at each position which are not being called"
114    }
115
116    fn reset(&mut self) {
117        self.n_counts.clear();
118        self.not_n_counts.clear();
119    }
120
121    fn raises_error(&self) -> bool {
122        let threshold = self.limits.threshold("n_content\terror", 20.0);
123        let data = self.calculate();
124        data.percentages.iter().any(|&p| p > threshold)
125    }
126
127    fn raises_warning(&self) -> bool {
128        let threshold = self.limits.threshold("n_content\twarn", 5.0);
129        let data = self.calculate();
130        data.percentages.iter().any(|&p| p > threshold)
131    }
132
133    fn ignore_filtered_sequences(&self) -> bool {
134        true
135    }
136
137    fn ignore_in_report(&self) -> bool {
138        self.limits.is_ignored("n_content")
139    }
140
141    fn write_text_report(&self, writer: &mut dyn io::Write) -> io::Result<()> {
142        let data = self.calculate();
143
144        // Header matches Java's makeReport
145        writeln!(writer, "#Base\tN-Count")?;
146
147        for i in 0..data.x_categories.len() {
148            writeln!(
149                writer,
150                "{}\t{}",
151                data.x_categories[i],
152                java_format_double(data.percentages[i]),
153            )?;
154        }
155
156        Ok(())
157    }
158
159    // Image filename matches Java's "per_base_n_content.png" in Images/
160    fn chart_image_name(&self) -> Option<&str> {
161        Some("per_base_n_content")
162    }
163    fn chart_alt_text(&self) -> Option<&str> {
164        Some("N content graph")
165    }
166    fn generate_chart_svg(&self) -> Option<String> {
167        Some(self.build_chart_svg())
168    }
169}
170
171struct NContentData {
172    x_categories: Vec<String>,
173    percentages: Vec<f64>,
174}