Skip to main content

fastqc_rust/utils/
base_group.rs

1/// Base position grouping for read-level plots.
2///
3/// Replicates the logic from `Graphs/BaseGroup.java`. Early positions are shown
4/// individually while later positions are grouped into bins so that general
5/// trends remain visible without overwhelming the output.
6/// A range of read positions (0-based, inclusive on both ends).
7#[derive(Debug, Clone, PartialEq, Eq)]
8pub struct BaseGroup {
9    pub lower_count: usize, // 0-based start (inclusive)
10    pub upper_count: usize, // 0-based end (inclusive)
11}
12
13impl BaseGroup {
14    /// Human-readable label for this group.
15    ///
16    /// Java's `BaseGroup.toString()` uses 1-based positions.
17    /// A single-position group prints as e.g. "1", a range as "10-14".
18    pub fn label(&self) -> String {
19        let lower = self.lower_count + 1;
20        let upper = self.upper_count + 1;
21        if lower == upper {
22            format!("{}", lower)
23        } else {
24            format!("{}-{}", lower, upper)
25        }
26    }
27
28    /// Build the set of base groups for a given maximum read length.
29    ///
30    /// Replicates `BaseGroup.makeBaseGroups(int)`. The Java code
31    /// works in 1-based coordinates internally and we convert to 0-based here.
32    /// If `min_length` (from config) exceeds `max_length`, groups are extended
33    /// to cover `min_length` positions, matching Java's BaseGroup.java behavior.
34    pub fn make_base_groups(
35        max_length: usize,
36        min_length: usize,
37        nogroup: bool,
38        expgroup: bool,
39    ) -> Vec<BaseGroup> {
40        let effective_length = max_length.max(min_length);
41        if nogroup {
42            make_ungrouped_groups(effective_length)
43        } else if expgroup {
44            make_exponential_base_groups(effective_length)
45        } else {
46            make_linear_base_groups(effective_length)
47        }
48    }
49}
50
51/// Replicates `makeUngroupedGroups`. Java uses 1-based coordinates;
52/// we produce 0-based groups. Each position gets its own group.
53fn make_ungrouped_groups(max_length: usize) -> Vec<BaseGroup> {
54    (0..max_length)
55        .map(|i| BaseGroup {
56            lower_count: i,
57            upper_count: i,
58        })
59        .collect()
60}
61
62/// Replicates `makeExponentialBaseGroups` exactly. The interval
63/// increases at specific thresholds (positions 10, 50, 100, 500, 1000 in
64/// 1-based coordinates) depending on max_length.
65fn make_exponential_base_groups(max_length: usize) -> Vec<BaseGroup> {
66    let mut groups = Vec::new();
67    // Java works in 1-based coordinates throughout this method.
68    let mut starting_base: usize = 1;
69    let mut interval: usize = 1;
70
71    while starting_base <= max_length {
72        let mut end_base = starting_base + interval - 1;
73        if end_base > max_length {
74            end_base = max_length;
75        }
76
77        groups.push(BaseGroup {
78            lower_count: starting_base - 1, // convert to 0-based
79            upper_count: end_base - 1,
80        });
81
82        starting_base += interval;
83
84        // These thresholds are checked after incrementing starting_base,
85        // matching the Java code exactly.
86        if starting_base == 10 && max_length > 75 {
87            interval = 5;
88        }
89        if starting_base == 50 && max_length > 200 {
90            interval = 10;
91        }
92        if starting_base == 100 && max_length > 300 {
93            interval = 50;
94        }
95        if starting_base == 500 && max_length > 1000 {
96            interval = 100;
97        }
98        if starting_base == 1000 && max_length > 2000 {
99            interval = 500;
100        }
101    }
102
103    groups
104}
105
106/// Replicates `getLinearInterval`. Tries intervals from the set
107/// [2, 5, 10] * 10^n until the total number of groups (9 individual + grouped
108/// remainder) is below 75.
109fn get_linear_interval(length: usize) -> usize {
110    let base_values = [2, 5, 10];
111    let mut multiplier: usize = 1;
112
113    loop {
114        for &b in &base_values {
115            let interval = b * multiplier;
116            let mut group_count = 9 + (length - 9) / interval;
117            if !(length - 9).is_multiple_of(interval) {
118                group_count += 1;
119            }
120            if group_count < 75 {
121                return interval;
122            }
123        }
124
125        multiplier *= 10;
126
127        if multiplier == 10_000_000 {
128            panic!(
129                "Couldn't find a sensible interval grouping for length '{}'",
130                length
131            );
132        }
133    }
134}
135
136/// Replicates `makeLinearBaseGroups`. For lengths <= 75 returns
137/// ungrouped. Otherwise first 9 positions are individual, then groups of a
138/// calculated interval. The special case where `starting_base == 10` and
139/// `interval > 10` adjusts the first grouped bin to align to the interval
140/// boundary, exactly matching the Java logic.
141fn make_linear_base_groups(max_length: usize) -> Vec<BaseGroup> {
142    if max_length <= 75 {
143        return make_ungrouped_groups(max_length);
144    }
145
146    let interval = get_linear_interval(max_length);
147    let mut groups = Vec::new();
148    // Java works in 1-based coordinates.
149    let mut starting_base: usize = 1;
150
151    while starting_base <= max_length {
152        let mut end_base = starting_base + interval - 1;
153
154        // First 9 positions (1-based 1..9) are individual.
155        if starting_base < 10 {
156            end_base = starting_base;
157        }
158
159        // When the interval is larger than 10, the first grouped
160        // bin after the individual positions extends to (interval - 1) so it
161        // aligns with subsequent interval boundaries.
162        if starting_base == 10 && interval > 10 {
163            end_base = interval - 1;
164        }
165
166        if end_base > max_length {
167            end_base = max_length;
168        }
169
170        groups.push(BaseGroup {
171            lower_count: starting_base - 1,
172            upper_count: end_base - 1,
173        });
174
175        if starting_base < 10 {
176            starting_base += 1;
177        } else if starting_base == 10 && interval > 10 {
178            // Jump to the interval boundary.
179            starting_base = interval;
180        } else {
181            starting_base += interval;
182        }
183    }
184
185    groups
186}
187
188#[cfg(test)]
189mod tests {
190    use super::*;
191
192    #[test]
193    fn test_label_single() {
194        let g = BaseGroup {
195            lower_count: 0,
196            upper_count: 0,
197        };
198        assert_eq!(g.label(), "1");
199    }
200
201    #[test]
202    fn test_label_range() {
203        let g = BaseGroup {
204            lower_count: 9,
205            upper_count: 13,
206        };
207        assert_eq!(g.label(), "10-14");
208    }
209
210    #[test]
211    fn test_ungrouped_10() {
212        let groups = BaseGroup::make_base_groups(10, 0, true, false);
213        assert_eq!(groups.len(), 10);
214        assert_eq!(groups[0].label(), "1");
215        assert_eq!(groups[9].label(), "10");
216    }
217
218    #[test]
219    fn test_linear_short_ungrouped() {
220        // <= 75 should be ungrouped even without nogroup flag
221        let groups = BaseGroup::make_base_groups(50, 0, false, false);
222        assert_eq!(groups.len(), 50);
223    }
224
225    #[test]
226    fn test_linear_100() {
227        let groups = BaseGroup::make_base_groups(100, 0, false, false);
228        // Should have 9 individual + grouped remainder
229        assert!(groups.len() < 75);
230        // First 9 are individual
231        for (i, group) in groups.iter().enumerate().take(9) {
232            assert_eq!(group.lower_count, i);
233            assert_eq!(group.upper_count, i);
234        }
235    }
236
237    #[test]
238    fn test_exponential_150() {
239        let groups = BaseGroup::make_base_groups(150, 0, false, true);
240        // First 9 individual, then groups of 5 (since 150 > 75)
241        assert_eq!(groups[0].label(), "1");
242        assert_eq!(groups[8].label(), "9");
243        // Position 10 starts grouped by 5: 10-14
244        assert_eq!(groups[9].label(), "10-14");
245    }
246
247    #[test]
248    fn test_exponential_250() {
249        let groups = BaseGroup::make_base_groups(250, 0, false, true);
250        // After position 50 (1-based), interval goes to 10 since 250 > 200
251        // Find the group starting at position 50 (1-based)
252        let g50 = groups.iter().find(|g| g.lower_count == 49).unwrap();
253        assert_eq!(g50.label(), "50-59");
254    }
255
256    #[test]
257    fn test_get_linear_interval_100() {
258        let interval = get_linear_interval(100);
259        assert_eq!(interval, 2);
260    }
261
262    #[test]
263    fn test_get_linear_interval_300() {
264        let interval = get_linear_interval(300);
265        let group_count = 9 + (300 - 9) / interval + if (300 - 9) % interval != 0 { 1 } else { 0 };
266        assert!(group_count < 75);
267    }
268}