rustalign_aligner/
seed.rs1use crate::{Constraint, SeedPolicy, SeedType};
4use rustalign_common::Nuc;
5use rustalign_fmindex::FmIndex;
6
7#[derive(Debug, Clone)]
9pub struct Seed {
10 pub bwt_row: u64,
12
13 pub pos: usize,
15
16 pub read_pos: usize,
18
19 pub len: usize,
21
22 pub seed_type: SeedType,
24
25 pub constraint: Constraint,
27
28 pub hit_count: usize,
31}
32
33impl Seed {
34 pub fn new(bwt_row: u64, pos: usize, read_pos: usize, len: usize, seed_type: SeedType) -> Self {
36 Self {
37 bwt_row,
38 pos,
39 read_pos,
40 len,
41 seed_type,
42 constraint: Constraint::new(),
43 hit_count: 1,
44 }
45 }
46
47 pub fn with_hit_count(
49 bwt_row: u64,
50 pos: usize,
51 read_pos: usize,
52 len: usize,
53 seed_type: SeedType,
54 hit_count: usize,
55 ) -> Self {
56 Self {
57 bwt_row,
58 pos,
59 read_pos,
60 len,
61 seed_type,
62 constraint: Constraint::new(),
63 hit_count,
64 }
65 }
66}
67
68#[derive(Debug, Clone, Default)]
70pub struct SeedResults {
71 pub hits: Vec<Seed>,
73
74 pub seeds_searched: usize,
76}
77
78pub struct SeedAligner {
80 policy: SeedPolicy,
82
83 seed_sets: usize,
85}
86
87impl SeedAligner {
88 pub fn new(policy: SeedPolicy) -> Self {
90 Self {
91 policy,
92 seed_sets: 3, }
94 }
95
96 pub fn with_seed_sets(policy: SeedPolicy, seed_sets: usize) -> Self {
98 Self { policy, seed_sets }
99 }
100
101 pub fn search<I: FmIndex>(
107 &mut self,
108 index: &I,
109 read: &[Nuc],
110 ) -> rustalign_common::Result<SeedResults> {
111 let mut best_results = SeedResults::default();
112 let _read_len = read.len();
113
114 for set_idx in 0..self.seed_sets {
116 let offset = if set_idx == 0 {
121 0
122 } else {
123 (self.policy.seed_interval * set_idx) / (self.seed_sets)
124 };
125
126 let results = self.search_with_offset(index, read, offset)?;
127
128 if !results.hits.is_empty() {
130 for hit in results.hits {
132 let is_duplicate = best_results.hits.iter().any(|existing| {
134 existing.bwt_row == hit.bwt_row && existing.read_pos == hit.read_pos
135 });
136 if !is_duplicate {
137 best_results.hits.push(hit);
138 }
139 }
140 best_results.seeds_searched += results.seeds_searched;
141
142 if set_idx == 0 {
144 break;
145 }
146 }
147 }
148
149 best_results.hits.sort_by(|a, b| {
152 match a.hit_count.cmp(&b.hit_count) {
154 std::cmp::Ordering::Equal => {
155 a.read_pos.cmp(&b.read_pos)
157 }
158 other => other,
159 }
160 });
161
162 Ok(best_results)
163 }
164
165 fn search_with_offset<I: FmIndex>(
167 &mut self,
168 index: &I,
169 read: &[Nuc],
170 offset: usize,
171 ) -> rustalign_common::Result<SeedResults> {
172 let mut results = SeedResults::default();
173 let read_len = read.len();
174
175 let mut seed_pos = offset;
177 while seed_pos + self.policy.seed_len <= read_len {
178 let seed_seq = &read[seed_pos..seed_pos + self.policy.seed_len];
179
180 match index.exact_match(seed_seq) {
182 Ok(positions) => {
183 let hit_count = positions.len();
185
186 for bwt_row in positions.into_iter().take(self.policy.max_hits) {
188 let seed = Seed::with_hit_count(
189 bwt_row as u64,
190 bwt_row,
191 seed_pos,
192 self.policy.seed_len,
193 self.policy.seed_type,
194 hit_count,
195 );
196 results.hits.push(seed);
197 }
198 }
199 Err(_) => {
200 }
202 }
203
204 results.seeds_searched += 1;
205 seed_pos += self.policy.seed_interval;
206 }
207
208 Ok(results)
209 }
210}
211
212impl Default for SeedAligner {
213 fn default() -> Self {
214 Self::new(SeedPolicy::default())
215 }
216}
217
218#[cfg(test)]
219mod tests {
220 use super::*;
221
222 #[test]
223 fn test_seed_new() {
224 let seed = Seed::new(100, 100, 0, 22, SeedType::Exact);
225 assert_eq!(seed.bwt_row, 100);
226 assert_eq!(seed.pos, 100);
227 assert_eq!(seed.read_pos, 0);
228 assert_eq!(seed.len, 22);
229 assert_eq!(seed.hit_count, 1);
230 }
231
232 #[test]
233 fn test_seed_with_hit_count() {
234 let seed = Seed::with_hit_count(100, 100, 0, 22, SeedType::Exact, 5);
235 assert_eq!(seed.bwt_row, 100);
236 assert_eq!(seed.hit_count, 5);
237 }
238
239 #[test]
240 fn test_seed_aligner_new() {
241 let aligner = SeedAligner::new(SeedPolicy::new());
242 assert_eq!(aligner.policy.seed_len, 22);
243 }
244}