1#[cfg(feature = "simd_sse2")]
4use crate::sse2::*;
5
6#[cfg(feature = "simd_avx2")]
7use crate::avx2::*;
8
9#[cfg(feature = "simd_wasm")]
10use crate::simd128::*;
11
12#[cfg(feature = "simd_neon")]
13use crate::neon::*;
14
15use crate::scores::*;
16use crate::cigar::*;
17
18use std::{cmp, ptr, i16, alloc};
19use std::ops::RangeInclusive;
20
21#[cfg(feature = "mca")]
22use std::arch::asm;
23
24struct State<'a, M: Matrix> {
59 query: &'a PaddedBytes,
60 i: usize,
61 reference: &'a PaddedBytes,
62 j: usize,
63 min_size: usize,
64 max_size: usize,
65 matrix: &'a M,
66 gaps: Gaps,
67 x_drop: i32
68}
69
70struct StateProfile<'a, P: Profile> {
76 query: &'a PaddedBytes,
77 i: usize,
78 reference: &'a P,
79 j: usize,
80 min_size: usize,
81 max_size: usize,
82 x_drop: i32
83}
84
85pub struct Block<const TRACE: bool, const X_DROP: bool = false, const LOCAL_START: bool = false, const FREE_QUERY_START_GAPS: bool = false, const FREE_QUERY_END_GAPS: bool = false> {
90 res: AlignResult,
91 allocated: Allocated
92}
93
94macro_rules! align_core_gen {
95 ($fn_name:ident, $matrix_or_profile:tt, $state:tt, $place_block_right_fn:path, $place_block_down_fn:path) => {
96 #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))]
97 #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))]
98 #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))]
99 #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))]
100 #[allow(non_snake_case)]
101 unsafe fn $fn_name<M: $matrix_or_profile>(&mut self, mut state: $state<M>) {
102 let mut best_max = 0i32;
104 let mut best_argmax_i = 0usize;
105 let mut best_argmax_j = 0usize;
106
107 let mut prev_dir = Direction::Grow;
108 let mut dir = Direction::Grow;
109 let mut prev_size = 0;
110 let mut block_size = state.min_size;
111
112 let mut off = 0i32;
114 let mut prev_off;
115 let mut off_max = 0i32;
116
117 let mut y_drop_iter = 0;
119
120 let mut x_drop_iter = 0;
122
123 let mut i_ckpt = state.i;
124 let mut j_ckpt = state.j;
125 let mut off_ckpt = 0i32;
126
127 let mut D_corner = simd_set1_i16(MIN);
129
130 loop {
131 #[cfg(feature = "debug")]
132 {
133 println!("i: {}", state.i);
134 println!("j: {}", state.j);
135 println!("{:?}", dir);
136 println!("block size: {}", block_size);
137 }
138
139 prev_off = off;
140
141 let mut grow_D_max = simd_set1_i16(MIN);
144 let mut grow_D_argmax_i = simd_set1_i16(0);
145 let mut grow_D_argmax_j = simd_set1_i16(0);
146 let (D_max, D_argmax_i, D_argmax_j, mut right_max, mut down_max) = match dir {
147 Direction::Right => {
148 off = off_max;
149 #[cfg(feature = "debug")]
150 println!("off: {}", off);
151 let off_add = simd_set1_i16(clamp(prev_off - off));
152
153 if TRACE {
154 self.allocated.trace.add_block(state.i, state.j + block_size - STEP, STEP, block_size, true);
155 }
156
157 Self::just_offset(block_size, self.allocated.D_col.as_mut_ptr(), self.allocated.C_col.as_mut_ptr(), off_add);
159
160 let (D_max, D_argmax_i, D_argmax_j) = $place_block_right_fn(
163 &state,
164 state.query,
165 state.reference,
166 &mut self.allocated.trace,
167 state.i,
168 state.j + block_size - STEP,
169 STEP,
170 block_size,
171 self.allocated.D_col.as_mut_ptr(),
172 self.allocated.C_col.as_mut_ptr(),
173 self.allocated.temp_buf1.as_mut_ptr(),
174 self.allocated.temp_buf2.as_mut_ptr(),
175 if prev_dir == Direction::Down { simd_adds_i16(D_corner, off_add) } else { simd_set1_i16(MIN) },
176 clamp(-off + (ZERO as i32)),
177 true
178 );
179
180 let right_max = Self::prefix_max(self.allocated.D_col.as_ptr());
182
183 D_corner = Self::shift_and_offset(
185 block_size,
186 self.allocated.D_row.as_mut_ptr(),
187 self.allocated.R_row.as_mut_ptr(),
188 self.allocated.temp_buf1.as_mut_ptr(),
189 self.allocated.temp_buf2.as_mut_ptr(),
190 off_add
191 );
192 let down_max = Self::prefix_max(self.allocated.D_row.as_ptr());
194
195 (D_max, D_argmax_i, D_argmax_j, right_max, down_max)
196 },
197 Direction::Down => {
198 off = off_max;
199 #[cfg(feature = "debug")]
200 println!("off: {}", off);
201 let off_add = simd_set1_i16(clamp(prev_off - off));
202
203 if TRACE {
204 self.allocated.trace.add_block(state.i + block_size - STEP, state.j, block_size, STEP, false);
205 }
206
207 Self::just_offset(block_size, self.allocated.D_row.as_mut_ptr(), self.allocated.R_row.as_mut_ptr(), off_add);
209
210 let (D_max, D_argmax_i, D_argmax_j) = $place_block_down_fn(
213 &state,
214 state.reference,
215 state.query,
216 &mut self.allocated.trace,
217 state.j,
218 state.i + block_size - STEP,
219 STEP,
220 block_size,
221 self.allocated.D_row.as_mut_ptr(),
222 self.allocated.R_row.as_mut_ptr(),
223 self.allocated.temp_buf1.as_mut_ptr(),
224 self.allocated.temp_buf2.as_mut_ptr(),
225 if prev_dir == Direction::Right { simd_adds_i16(D_corner, off_add) } else { simd_set1_i16(MIN) },
226 clamp(-off + (ZERO as i32)),
227 false
228 );
229
230 let down_max = Self::prefix_max(self.allocated.D_row.as_ptr());
232
233 D_corner = Self::shift_and_offset(
235 block_size,
236 self.allocated.D_col.as_mut_ptr(),
237 self.allocated.C_col.as_mut_ptr(),
238 self.allocated.temp_buf1.as_mut_ptr(),
239 self.allocated.temp_buf2.as_mut_ptr(),
240 off_add
241 );
242 let right_max = Self::prefix_max(self.allocated.D_col.as_ptr());
244
245 (D_max, D_argmax_i, D_argmax_j, right_max, down_max)
246 },
247 Direction::Grow => {
248 D_corner = simd_set1_i16(MIN);
249 let grow_step = block_size - prev_size;
250
251 #[cfg(feature = "debug")]
252 println!("off: {}", off);
253 #[cfg(feature = "debug")]
254 println!("Grow down");
255
256 if TRACE {
257 self.allocated.trace.add_block(state.i + prev_size, state.j, prev_size, grow_step, false);
258 }
259
260 let (D_max1, D_argmax_i1, D_argmax_j1) = $place_block_down_fn(
263 &state,
264 state.reference,
265 state.query,
266 &mut self.allocated.trace,
267 state.j,
268 state.i + prev_size,
269 grow_step,
270 prev_size,
271 self.allocated.D_row.as_mut_ptr(),
272 self.allocated.R_row.as_mut_ptr(),
273 self.allocated.D_col.as_mut_ptr().add(prev_size),
274 self.allocated.C_col.as_mut_ptr().add(prev_size),
275 simd_set1_i16(MIN),
276 clamp(-off + (ZERO as i32)),
277 false
278 );
279
280 #[cfg(feature = "debug")]
281 println!("Grow right");
282
283 if TRACE {
284 self.allocated.trace.add_block(state.i, state.j + prev_size, grow_step, block_size, true);
285 }
286
287 let (D_max2, D_argmax_i2, D_argmax_j2) = $place_block_right_fn(
290 &state,
291 state.query,
292 state.reference,
293 &mut self.allocated.trace,
294 state.i,
295 state.j + prev_size,
296 grow_step,
297 block_size,
298 self.allocated.D_col.as_mut_ptr(),
299 self.allocated.C_col.as_mut_ptr(),
300 self.allocated.D_row.as_mut_ptr().add(prev_size),
301 self.allocated.R_row.as_mut_ptr().add(prev_size),
302 simd_set1_i16(MIN),
303 clamp(-off + (ZERO as i32)),
304 true
305 );
306
307 let right_max = Self::prefix_max(self.allocated.D_col.as_ptr());
308 let down_max = Self::prefix_max(self.allocated.D_row.as_ptr());
309 grow_D_max = D_max1;
310 grow_D_argmax_i = D_argmax_i1;
311 grow_D_argmax_j = D_argmax_j1;
312
313 let mut i = 0;
316 while i < block_size {
317 self.allocated.D_col_ckpt.set_vec(&self.allocated.D_col, i);
318 self.allocated.C_col_ckpt.set_vec(&self.allocated.C_col, i);
319 self.allocated.D_row_ckpt.set_vec(&self.allocated.D_row, i);
320 self.allocated.R_row_ckpt.set_vec(&self.allocated.R_row, i);
321 i += L;
322 }
323
324 if TRACE {
325 self.allocated.trace.save_ckpt();
326 }
327
328 (D_max2, D_argmax_i2, D_argmax_j2, right_max, down_max)
329 }
330 };
331
332 prev_dir = dir;
333 let D_max_max = if FREE_QUERY_END_GAPS {
334 simd_slow_extract_i16(D_max, state.query.len() % L)
337 } else {
338 simd_hmax_i16(D_max)
339 };
340 let grow_max = simd_hmax_i16(grow_D_max);
341 let max = cmp::max(D_max_max, grow_max);
345 off_max = off + (max as i32) - (ZERO as i32);
346 #[cfg(feature = "debug")]
347 println!("down max: {}, right max: {}", down_max, right_max);
348
349 y_drop_iter += 1;
350 let mut grow_no_max = dir == Direction::Grow;
352
353 if off_max > best_max {
354 if FREE_QUERY_END_GAPS {
355 let idx_j = simd_slow_extract_i16(D_argmax_j, state.query.len() % L) as usize;
358 best_argmax_i = state.query.len();
359 match dir {
360 Direction::Right => {
361 best_argmax_j = state.j + (block_size - STEP) + idx_j;
362 },
363 Direction::Grow => {
364 best_argmax_j = state.j + prev_size + idx_j;
365 },
366 _ => unreachable!(),
367 }
368 }
369
370 if X_DROP {
371 let lane_idx = simd_hargmax_i16(D_max, D_max_max);
374 let idx_i = simd_slow_extract_i16(D_argmax_i, lane_idx) as usize;
375 let idx_j = simd_slow_extract_i16(D_argmax_j, lane_idx) as usize;
376 let r = idx_i + lane_idx;
377 let c = (block_size - STEP) + idx_j;
378
379 match dir {
380 Direction::Right => {
381 best_argmax_i = state.i + r;
382 best_argmax_j = state.j + c;
383 },
384 Direction::Down => {
385 best_argmax_i = state.i + c;
386 best_argmax_j = state.j + r;
387 },
388 Direction::Grow => {
389 if D_max_max >= grow_max {
391 best_argmax_i = state.i + idx_i + lane_idx;
393 best_argmax_j = state.j + prev_size + idx_j;
394 } else {
395 let lane_idx = simd_hargmax_i16(grow_D_max, grow_max);
397 let idx_i = simd_slow_extract_i16(grow_D_argmax_i, lane_idx) as usize;
398 let idx_j = simd_slow_extract_i16(grow_D_argmax_j, lane_idx) as usize;
399 best_argmax_i = state.i + prev_size + idx_j;
400 best_argmax_j = state.j + idx_i + lane_idx;
401 }
402 }
403 }
404 }
405
406 if block_size < state.max_size {
407 i_ckpt = state.i;
410 j_ckpt = state.j;
411 off_ckpt = off;
412
413 let mut i = 0;
414 while i < block_size {
415 self.allocated.D_col_ckpt.set_vec(&self.allocated.D_col, i);
416 self.allocated.C_col_ckpt.set_vec(&self.allocated.C_col, i);
417 self.allocated.D_row_ckpt.set_vec(&self.allocated.D_row, i);
418 self.allocated.R_row_ckpt.set_vec(&self.allocated.R_row, i);
419 i += L;
420 }
421
422 if TRACE {
423 self.allocated.trace.save_ckpt();
424 }
425
426 grow_no_max = false;
427 }
428
429 best_max = off_max;
430
431 y_drop_iter = 0;
432 }
433
434 if X_DROP {
435 if off_max < best_max - state.x_drop {
436 if x_drop_iter < X_DROP_ITER - 1 {
437 x_drop_iter += 1;
438 } else {
439 break;
441 }
442 } else {
443 x_drop_iter = 0;
444 }
445 }
446
447 if state.i + block_size > state.query.len() && state.j + block_size > state.reference.len() {
448 break;
450 }
451
452 if state.j + block_size > state.reference.len() {
454 state.i += STEP;
455 dir = Direction::Down;
456 continue;
457 }
458 if state.i + block_size > state.query.len() {
459 state.j += STEP;
460 dir = Direction::Right;
461 continue;
462 }
463
464 let next_size = block_size * 2;
472 if next_size <= state.max_size {
473 if y_drop_iter > (block_size / STEP) - 1 || grow_no_max {
476 prev_size = block_size;
478 block_size = next_size;
479 dir = Direction::Grow;
480
481 state.i = i_ckpt;
483 state.j = j_ckpt;
484 off = off_ckpt;
485
486 let mut i = 0;
487 while i < prev_size {
488 self.allocated.D_col.set_vec(&self.allocated.D_col_ckpt, i);
489 self.allocated.C_col.set_vec(&self.allocated.C_col_ckpt, i);
490 self.allocated.D_row.set_vec(&self.allocated.D_row_ckpt, i);
491 self.allocated.R_row.set_vec(&self.allocated.R_row_ckpt, i);
492 i += L;
493 }
494
495 if TRACE {
496 self.allocated.trace.restore_ckpt();
497 }
498
499 y_drop_iter = 0;
500 continue;
501 }
502 }
503
504 if SHRINK && block_size > state.min_size && y_drop_iter == 0 {
506 let shrink_max = cmp::max(
507 Self::suffix_max(self.allocated.D_row.as_ptr(), block_size),
508 Self::suffix_max(self.allocated.D_col.as_ptr(), block_size)
509 );
510 if shrink_max >= max {
511 prev_dir = Direction::Grow;
513
514 block_size /= 2;
515 let mut i = 0;
516 while i < block_size {
517 self.allocated.D_col.copy_vec(i, i + block_size);
518 self.allocated.C_col.copy_vec(i, i + block_size);
519 self.allocated.D_row.copy_vec(i, i + block_size);
520 self.allocated.R_row.copy_vec(i, i + block_size);
521 i += L;
522 }
523
524 state.i += block_size;
525 state.j += block_size;
526
527 i_ckpt = state.i;
528 j_ckpt = state.j;
529 off_ckpt = off;
530
531 let mut i = 0;
532 while i < block_size {
533 self.allocated.D_col_ckpt.set_vec(&self.allocated.D_col, i);
534 self.allocated.C_col_ckpt.set_vec(&self.allocated.C_col, i);
535 self.allocated.D_row_ckpt.set_vec(&self.allocated.D_row, i);
536 self.allocated.R_row_ckpt.set_vec(&self.allocated.R_row, i);
537 i += L;
538 }
539
540 right_max = Self::prefix_max(self.allocated.D_col.as_ptr());
541 down_max = Self::prefix_max(self.allocated.D_row.as_ptr());
542
543 if TRACE {
544 self.allocated.trace.save_ckpt();
545 }
546
547 y_drop_iter = 0;
548 }
549 }
550
551 if down_max > right_max {
553 state.i += STEP;
554 dir = Direction::Down;
555 } else {
556 state.j += STEP;
557 dir = Direction::Right;
558 }
559 }
560
561 #[cfg(any(feature = "debug", feature = "debug_size"))]
562 {
563 println!("query size: {}, reference size: {}", state.query.len(), state.reference.len());
564 println!("end block size: {}", block_size);
565 }
566
567 self.res = if X_DROP || FREE_QUERY_END_GAPS {
568 AlignResult {
569 score: best_max,
570 query_idx: best_argmax_i,
571 reference_idx: best_argmax_j
572 }
573 } else {
574 debug_assert!(state.i <= state.query.len());
575 let score = off + match dir {
576 Direction::Right | Direction::Grow => {
577 let idx = state.query.len() - state.i;
578 debug_assert!(idx < block_size);
579 (self.allocated.D_col.get(idx) as i32) - (ZERO as i32)
580 },
581 Direction::Down => {
582 let idx = state.reference.len() - state.j;
583 debug_assert!(idx < block_size);
584 (self.allocated.D_row.get(idx) as i32) - (ZERO as i32)
585 }
586 };
587 AlignResult {
588 score,
589 query_idx: state.query.len(),
590 reference_idx: state.reference.len()
591 }
592 };
593 }
594 };
595}
596
597macro_rules! place_block_profile_gen {
613 ($fn_name:ident, $query: ident, $query_type: ty, $reference: ident, $reference_type: ty, $q: ident, $r: ident, $right: expr) => {
614 #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))]
615 #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))]
616 #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))]
617 #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))]
618 #[allow(non_snake_case)]
619 unsafe fn $fn_name<P: Profile>(_state: &StateProfile<P>,
620 $query: $query_type,
621 $reference: $reference_type,
622 trace: &mut Trace,
623 start_i: usize,
624 start_j: usize,
625 width: usize,
626 height: usize,
627 D_col: *mut i16,
628 C_col: *mut i16,
629 D_row: *mut i16,
630 R_row: *mut i16,
631 mut D_corner: Simd,
632 relative_zero: i16,
633 _right: bool) -> (Simd, Simd, Simd) {
634 let gap_extend = simd_set1_i16($r.get_gap_extend() as i16);
635 let (gap_extend_all, prefix_scan_consts) = get_prefix_scan_consts(gap_extend);
636 let mut D_max = simd_set1_i16(MIN);
637 let mut D_argmax_i = simd_set1_i16(0);
638 let mut D_argmax_j = simd_set1_i16(0);
639
640 let mut idx = 0;
641 let mut gap_open_C = simd_set1_i16(MIN);
642 let mut gap_close_C = simd_set1_i16(MIN);
643 let mut gap_open_R = simd_set1_i16(MIN);
644 let mut gap_close_R = simd_set1_i16(MIN);
645
646 if width == 0 || height == 0 {
647 return (D_max, D_argmax_i, D_argmax_j);
648 }
649
650 for j in 0..width {
652 let mut R01 = simd_set1_i16(MIN);
653 let mut D11 = simd_set1_i16(MIN);
654 let mut R11 = simd_set1_i16(MIN);
655 let mut prev_trace_R = simd_set1_i16(0);
656
657 if $right {
658 idx = start_j + j;
659 gap_open_C = $r.get_gap_open_right_C(idx);
660 gap_close_C = $r.get_gap_close_right_C(idx);
661 gap_open_R = $r.get_gap_open_right_R(idx);
662 }
663
664 let mut i = 0;
665 while i < height {
666 let D10 = simd_load(D_col.add(i) as _);
667 let C10 = simd_load(C_col.add(i) as _);
668 let D00 = simd_sl_i16!(D10, D_corner, 1);
669 D_corner = D10;
670
671 if !$right {
672 idx = start_i + i;
673 gap_open_C = $r.get_gap_open_down_R(idx);
674 gap_open_R = $r.get_gap_open_down_C(idx);
675 gap_close_R = $r.get_gap_close_down_C(idx);
676 }
677
678 let scores = if $right {
679 $r.get_scores_pos(idx, halfsimd_loadu($q.as_ptr(start_i + i) as _), true)
680 } else {
681 $r.get_scores_aa(idx, $q.get(start_j + j), false)
682 };
683 D11 = simd_adds_i16(D00, scores);
684 if (!LOCAL_START && start_i + i == 0 && start_j + j == 0) || (FREE_QUERY_START_GAPS && $right && start_i + i == 0) {
685 D11 = simd_insert_i16!(D11, relative_zero, 0);
686 }
687
688 if LOCAL_START {
689 D11 = simd_max_i16(D11, simd_set1_i16(relative_zero));
690 }
691
692 let C11_open = simd_adds_i16(D10, simd_adds_i16(gap_open_C, gap_extend));
693 let C11 = simd_max_i16(simd_adds_i16(C10, gap_extend), C11_open);
694 let C11_end = if $right { simd_adds_i16(C11, gap_close_C) } else { C11 };
695 D11 = simd_max_i16(D11, C11_end);
696 let D11_open = simd_adds_i16(D11, gap_open_R);
699 R11 = simd_prefix_scan_i16(D11_open, gap_extend, prefix_scan_consts);
700 R11 = simd_max_i16(R11, simd_adds_i16(simd_broadcasthi_i16(R01), gap_extend_all));
703 let R11_end = if $right { R11 } else { simd_adds_i16(R11, gap_close_R) };
705 D11 = simd_max_i16(D11, R11_end);
706 R01 = R11;
707
708 #[cfg(feature = "debug")]
709 {
710 print!("s: ");
711 simd_dbg_i16(scores);
712 print!("D00: ");
713 simd_dbg_i16(simd_subs_i16(D00, simd_set1_i16(ZERO)));
714 print!("C11: ");
715 simd_dbg_i16(simd_subs_i16(C11, simd_set1_i16(ZERO)));
716 print!("R11: ");
717 simd_dbg_i16(simd_subs_i16(R11, simd_set1_i16(ZERO)));
718 print!("D11: ");
719 simd_dbg_i16(simd_subs_i16(D11, simd_set1_i16(ZERO)));
720 }
721
722 if TRACE {
723 let trace_D_C = simd_cmpeq_i16(D11, C11_end);
724 let trace_D_R = simd_cmpeq_i16(D11, R11_end);
725 #[cfg(feature = "debug")]
726 {
727 print!("D_C: ");
728 simd_dbg_i16(trace_D_C);
729 print!("D_R: ");
730 simd_dbg_i16(trace_D_R);
731 }
732 let mask = simd_set1_i16(0xFF00u16 as i16);
734 let trace_data = simd_movemask_i8(simd_blend_i8(trace_D_C, trace_D_R, mask));
735 let temp_trace_R = simd_cmpeq_i16(R11, D11_open);
736 let trace_R = simd_sl_i16!(temp_trace_R, prev_trace_R, 1);
737 let trace_data2 = simd_movemask_i8(simd_blend_i8(simd_cmpeq_i16(C11, C11_open), trace_R, mask));
738 prev_trace_R = temp_trace_R;
739
740 if LOCAL_START {
741 let zero_mask = simd_cmpeq_i16(D11, simd_set1_i16(relative_zero));
742 trace.add_zero_mask(simd_movemask_i8(zero_mask) as TraceType);
743 }
744
745 trace.add_trace(trace_data as TraceType, trace_data2 as TraceType);
746 }
747
748 D_max = simd_max_i16(D_max, D11);
749
750 if X_DROP || (FREE_QUERY_END_GAPS && start_i + i + L > $query.len()) {
751 let mask = simd_cmpeq_i16(D_max, D11);
755 D_argmax_i = simd_blend_i8(D_argmax_i, simd_set1_i16(i as i16), mask);
756 D_argmax_j = simd_blend_i8(D_argmax_j, simd_set1_i16(j as i16), mask);
757 }
758
759 simd_store(D_col.add(i) as _, D11);
760 simd_store(C_col.add(i) as _, C11);
761 i += L;
762 }
763
764 D_corner = simd_set1_i16(MIN);
765
766 ptr::write(D_row.add(j), simd_extract_i16!(D11, L - 1));
767 ptr::write(R_row.add(j), simd_extract_i16!(R11, L - 1));
768
769 if !X_DROP && !FREE_QUERY_END_GAPS && start_i + height > $query.len()
770 && start_j + j >= $reference.len() {
771 if TRACE {
772 trace.add_trace_idx((width - 1 - j) * (height / L));
775 }
776 break;
777 }
778 }
779
780 (D_max, D_argmax_i, D_argmax_j)
781 }
782 };
783}
784
785const STEP: usize = 8;
788const X_DROP_ITER: usize = 2; const SHRINK: bool = true; const SHRINK_SUFFIX_LEN: usize = STEP / 4;
791impl<const TRACE: bool, const X_DROP: bool, const LOCAL_START: bool, const FREE_QUERY_START_GAPS: bool, const FREE_QUERY_END_GAPS: bool> Block<{ TRACE }, { X_DROP }, { LOCAL_START }, { FREE_QUERY_START_GAPS }, { FREE_QUERY_END_GAPS }> {
792 pub fn new(query_len: usize, reference_len: usize, max_size: usize) -> Self {
799 assert!(max_size.is_power_of_two(), "Block size must be a power of two!");
800
801 Self {
802 res: AlignResult { score: 0, query_idx: 0, reference_idx: 0 },
803 allocated: Allocated::new(query_len, reference_len, max_size, TRACE, LOCAL_START, FREE_QUERY_START_GAPS)
804 }
805 }
806
807 pub fn align<M: Matrix>(&mut self, query: &PaddedBytes, reference: &PaddedBytes, matrix: &M, gaps: Gaps, size: RangeInclusive<usize>, x_drop: i32) {
848 assert!(gaps.open < 0 && gaps.extend < 0, "Gap costs must be negative!");
850 assert!(gaps.open < gaps.extend, "Gap open must cost more than gap extend!");
853 let min_size = if *size.start() < L { L } else { *size.start() };
854 let max_size = if *size.end() < L { L } else { *size.end() };
855 assert!(min_size < (u16::MAX as usize) && max_size < (u16::MAX as usize), "Block sizes must be smaller than 2^16 - 1!");
856 assert!(min_size.is_power_of_two() && max_size.is_power_of_two(), "Block sizes must be powers of two!");
857 if X_DROP {
858 assert!(x_drop >= 0, "X-drop threshold amount must be nonnegative!");
859 }
860 assert!(!LOCAL_START || !FREE_QUERY_START_GAPS, "Cannot set both LOCAL_START and FREE_QUERY_START_GAPS!");
861 assert!(!X_DROP || !FREE_QUERY_END_GAPS, "Cannot set both X_DROP and FREE_QUERY_END_GAPS!");
862 assert!(!FREE_QUERY_END_GAPS || min_size > query.len(), "Min block size must be larger than the query length for FREE_QUERY_END_GAPS!");
863
864 unsafe { self.allocated.clear(query.len(), reference.len(), max_size, TRACE); }
865
866 let s = State {
867 query,
868 i: 0,
869 reference,
870 j: 0,
871 min_size,
872 max_size,
873 matrix,
874 gaps,
875 x_drop
876 };
877 unsafe { self.align_core(s); }
878 }
879
880 pub fn align_exp<M: Matrix>(&mut self, query: &PaddedBytes, reference: &PaddedBytes, matrix: &M, gaps: Gaps, size: RangeInclusive<usize>, x_drop: i32, target_score: i32) -> Option<usize> {
885 let mut min_size = if *size.start() < L { L } else { *size.start() };
886 let max_size = if *size.end() < L { L } else { *size.end() };
887 assert!(min_size < (u16::MAX as usize) && max_size < (u16::MAX as usize), "Block sizes must be smaller than 2^16 - 1!");
888 assert!(min_size.is_power_of_two() && max_size.is_power_of_two(), "Block sizes must be powers of two!");
889
890 while min_size <= max_size {
891 self.align(query, reference, matrix, gaps, min_size..=max_size, x_drop);
892 let curr_score = self.res().score;
893
894 if curr_score >= target_score {
895 return Some(min_size);
896 }
897
898 min_size *= 2;
899 }
900
901 None
902 }
903
904 pub fn align_profile<P: Profile>(&mut self, query: &PaddedBytes, profile: &P, size: RangeInclusive<usize>, x_drop: i32) {
943 assert!(profile.get_gap_extend() < 0, "Gap extend cost must be negative!");
945 let min_size = if *size.start() < L { L } else { *size.start() };
946 let max_size = if *size.end() < L { L } else { *size.end() };
947 assert!(min_size < (u16::MAX as usize) && max_size < (u16::MAX as usize), "Block sizes must be smaller than 2^16 - 1!");
948 assert!(min_size.is_power_of_two() && max_size.is_power_of_two(), "Block sizes must be powers of two!");
949 if X_DROP {
950 assert!(x_drop >= 0, "X-drop threshold amount must be nonnegative!");
951 }
952 assert!(!LOCAL_START || !FREE_QUERY_START_GAPS, "Cannot set both LOCAL_START and FREE_QUERY_START_GAPS!");
953 assert!(!X_DROP || !FREE_QUERY_END_GAPS, "Cannot set both X_DROP and FREE_QUERY_END_GAPS!");
954 assert!(!FREE_QUERY_END_GAPS || min_size > query.len(), "Min block size must be larger than the query length for FREE_QUERY_END_GAPS!");
955
956 unsafe { self.allocated.clear(query.len(), profile.len(), max_size, TRACE); }
957
958 let s = StateProfile {
959 query,
960 i: 0,
961 reference: profile,
962 j: 0,
963 min_size,
964 max_size,
965 x_drop
966 };
967 unsafe { self.align_profile_core(s); }
968 }
969
970 pub fn align_profile_exp<P: Profile>(&mut self, query: &PaddedBytes, profile: &P, size: RangeInclusive<usize>, x_drop: i32, target_score: i32) -> Option<usize> {
975 let mut min_size = if *size.start() < L { L } else { *size.start() };
976 let max_size = if *size.end() < L { L } else { *size.end() };
977 assert!(min_size < (u16::MAX as usize) && max_size < (u16::MAX as usize), "Block sizes must be smaller than 2^16 - 1!");
978 assert!(min_size.is_power_of_two() && max_size.is_power_of_two(), "Block sizes must be powers of two!");
979
980 while min_size <= max_size {
981 self.align_profile(query, profile, min_size..=max_size, x_drop);
982 let curr_score = self.res().score;
983
984 if curr_score >= target_score {
985 return Some(min_size);
986 }
987
988 min_size *= 2;
989 }
990
991 None
992 }
993
994 align_core_gen!(align_core, Matrix, State, Self::place_block, Self::place_block);
995 align_core_gen!(align_profile_core, Profile, StateProfile, Self::place_block_profile_right, Self::place_block_profile_down);
996
997 #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))]
998 #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))]
999 #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))]
1000 #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))]
1001 #[allow(non_snake_case)]
1002 #[inline]
1003 unsafe fn just_offset(block_size: usize, buf1: *mut i16, buf2: *mut i16, off_add: Simd) {
1004 let mut i = 0;
1005 while i < block_size {
1006 let a = simd_adds_i16(simd_load(buf1.add(i) as _), off_add);
1007 let b = simd_adds_i16(simd_load(buf2.add(i) as _), off_add);
1008 simd_store(buf1.add(i) as _, a);
1009 simd_store(buf2.add(i) as _, b);
1010 i += L;
1011 }
1012 }
1013
1014 #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))]
1015 #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))]
1016 #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))]
1017 #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))]
1018 #[allow(non_snake_case)]
1019 #[inline]
1020 unsafe fn prefix_max(buf: *const i16) -> i16 {
1021 simd_prefix_hmax_i16!(simd_load(buf as _), STEP)
1022 }
1023
1024 #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))]
1025 #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))]
1026 #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))]
1027 #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))]
1028 #[allow(non_snake_case)]
1029 #[inline]
1030 unsafe fn suffix_max(buf: *const i16, buf_len: usize) -> i16 {
1031 simd_suffix_hmax_i16!(simd_load(buf.add(buf_len - L) as _), SHRINK_SUFFIX_LEN)
1032 }
1033
1034 #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))]
1035 #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))]
1036 #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))]
1037 #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))]
1038 #[allow(non_snake_case)]
1039 #[inline]
1040 unsafe fn shift_and_offset(block_size: usize, buf1: *mut i16, buf2: *mut i16, temp_buf1: *mut i16, temp_buf2: *mut i16, off_add: Simd) -> Simd {
1041 let mut curr1 = simd_adds_i16(simd_load(buf1 as _), off_add);
1042 let D_corner = simd_set1_i16(simd_extract_i16!(curr1, STEP - 1));
1043 let mut curr2 = simd_adds_i16(simd_load(buf2 as _), off_add);
1044
1045 let mut i = 0;
1046 while i < block_size - L {
1047 let next1 = simd_adds_i16(simd_load(buf1.add(i + L) as _), off_add);
1048 let next2 = simd_adds_i16(simd_load(buf2.add(i + L) as _), off_add);
1049 simd_store(buf1.add(i) as _, simd_step(next1, curr1));
1050 simd_store(buf2.add(i) as _, simd_step(next2, curr2));
1051 curr1 = next1;
1052 curr2 = next2;
1053 i += L;
1054 }
1055
1056 let next1 = simd_load(temp_buf1 as _);
1057 let next2 = simd_load(temp_buf2 as _);
1058 simd_store(buf1.add(block_size - L) as _, simd_step(next1, curr1));
1059 simd_store(buf2.add(block_size - L) as _, simd_step(next2, curr2));
1060 D_corner
1061 }
1062
1063 #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))]
1079 #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))]
1080 #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))]
1081 #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))]
1082 #[allow(non_snake_case)]
1083 unsafe fn place_block<M: Matrix>(state: &State<M>,
1084 query: &PaddedBytes,
1085 reference: &PaddedBytes,
1086 trace: &mut Trace,
1087 start_i: usize,
1088 start_j: usize,
1089 width: usize,
1090 height: usize,
1091 D_col: *mut i16,
1092 C_col: *mut i16,
1093 D_row: *mut i16,
1094 R_row: *mut i16,
1095 mut D_corner: Simd,
1096 relative_zero: i16,
1097 right: bool) -> (Simd, Simd, Simd) {
1098 let gap_open = simd_set1_i16(state.gaps.open as i16);
1099 let gap_extend = simd_set1_i16(state.gaps.extend as i16);
1100 let (gap_extend_all, prefix_scan_consts) = get_prefix_scan_consts(gap_extend);
1101 let mut D_max = simd_set1_i16(MIN);
1102 let mut D_argmax_i = simd_set1_i16(0);
1103 let mut D_argmax_j = simd_set1_i16(0);
1104
1105 if width == 0 || height == 0 {
1106 return (D_max, D_argmax_i, D_argmax_j);
1107 }
1108
1109 for j in 0..width {
1111 let mut R01 = simd_set1_i16(MIN);
1112 let mut D11 = simd_set1_i16(MIN);
1113 let mut R11 = simd_set1_i16(MIN);
1114 let mut prev_trace_R = simd_set1_i16(0);
1115
1116 let c = reference.get(start_j + j);
1117
1118 let mut i = 0;
1119 while i < height {
1120 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "mca"))]
1121 asm!("# LLVM-MCA-BEGIN place_block inner loop", options(nomem, nostack, preserves_flags));
1122
1123 let D10 = simd_load(D_col.add(i) as _);
1124 let C10 = simd_load(C_col.add(i) as _);
1125 let D00 = simd_sl_i16!(D10, D_corner, 1);
1126 D_corner = D10;
1127
1128 let scores = state.matrix.get_scores(c, halfsimd_loadu(query.as_ptr(start_i + i) as _), right);
1129 D11 = simd_adds_i16(D00, scores);
1130 if (!LOCAL_START && start_i + i == 0 && start_j + j == 0) || (FREE_QUERY_START_GAPS && right && start_i + i == 0) {
1131 D11 = simd_insert_i16!(D11, relative_zero, 0);
1132 }
1133
1134 if LOCAL_START {
1135 D11 = simd_max_i16(D11, simd_set1_i16(relative_zero));
1136 }
1137
1138 let C11_open = simd_adds_i16(D10, gap_open);
1139 let C11 = simd_max_i16(simd_adds_i16(C10, gap_extend), C11_open);
1140 D11 = simd_max_i16(D11, C11);
1141 let D11_open = simd_adds_i16(D11, simd_subs_i16(gap_open, gap_extend));
1144 R11 = simd_prefix_scan_i16(D11_open, gap_extend, prefix_scan_consts);
1145 R11 = simd_max_i16(R11, simd_adds_i16(simd_broadcasthi_i16(R01), gap_extend_all));
1148 D11 = simd_max_i16(D11, R11);
1150 R01 = R11;
1151
1152 #[cfg(feature = "debug")]
1153 {
1154 print!("s: ");
1155 simd_dbg_i16(scores);
1156 print!("D00: ");
1157 simd_dbg_i16(simd_subs_i16(D00, simd_set1_i16(ZERO)));
1158 print!("C11: ");
1159 simd_dbg_i16(simd_subs_i16(C11, simd_set1_i16(ZERO)));
1160 print!("R11: ");
1161 simd_dbg_i16(simd_subs_i16(R11, simd_set1_i16(ZERO)));
1162 print!("D11: ");
1163 simd_dbg_i16(simd_subs_i16(D11, simd_set1_i16(ZERO)));
1164 }
1165
1166 if TRACE {
1167 let trace_D_C = simd_cmpeq_i16(D11, C11);
1168 let trace_D_R = simd_cmpeq_i16(D11, R11);
1169 #[cfg(feature = "debug")]
1170 {
1171 print!("D_C: ");
1172 simd_dbg_i16(trace_D_C);
1173 print!("D_R: ");
1174 simd_dbg_i16(trace_D_R);
1175 }
1176 let mask = simd_set1_i16(0xFF00u16 as i16);
1178 let trace_data = simd_movemask_i8(simd_blend_i8(trace_D_C, trace_D_R, mask));
1179 let temp_trace_R = simd_cmpeq_i16(R11, D11_open);
1180 let trace_R = simd_sl_i16!(temp_trace_R, prev_trace_R, 1);
1181 let trace_data2 = simd_movemask_i8(simd_blend_i8(simd_cmpeq_i16(C11, C11_open), trace_R, mask));
1182 prev_trace_R = temp_trace_R;
1183
1184 if LOCAL_START {
1185 let zero_mask = simd_cmpeq_i16(D11, simd_set1_i16(relative_zero));
1186 trace.add_zero_mask(simd_movemask_i8(zero_mask) as TraceType);
1187 }
1188
1189 trace.add_trace(trace_data as TraceType, trace_data2 as TraceType);
1190 }
1191
1192 D_max = simd_max_i16(D_max, D11);
1193
1194 if X_DROP || (FREE_QUERY_END_GAPS && start_i + i + L > query.len()) {
1195 let mask = simd_cmpeq_i16(D_max, D11);
1199 D_argmax_i = simd_blend_i8(D_argmax_i, simd_set1_i16(i as i16), mask);
1200 D_argmax_j = simd_blend_i8(D_argmax_j, simd_set1_i16(j as i16), mask);
1201 }
1202
1203 simd_store(D_col.add(i) as _, D11);
1204 simd_store(C_col.add(i) as _, C11);
1205 i += L;
1206
1207 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "mca"))]
1208 asm!("# LLVM-MCA-END", options(nomem, nostack, preserves_flags));
1209 }
1210
1211 D_corner = simd_set1_i16(MIN);
1212
1213 ptr::write(D_row.add(j), simd_extract_i16!(D11, L - 1));
1214 ptr::write(R_row.add(j), simd_extract_i16!(R11, L - 1));
1215
1216 if !X_DROP && !FREE_QUERY_END_GAPS && start_i + height > query.len()
1217 && start_j + j >= reference.len() {
1218 if TRACE {
1219 trace.add_trace_idx((width - 1 - j) * (height / L));
1222 }
1223 break;
1224 }
1225 }
1226
1227 (D_max, D_argmax_i, D_argmax_j)
1228 }
1229
1230 place_block_profile_gen!(place_block_profile_right, query, &PaddedBytes, reference, &P, query, reference, true);
1231 place_block_profile_gen!(place_block_profile_down, reference, &P, query, &PaddedBytes, query, reference, false);
1232
1233 #[inline]
1235 pub fn res(&self) -> AlignResult {
1236 self.res
1237 }
1238
1239 #[inline]
1241 pub fn trace(&self) -> &Trace {
1242 assert!(TRACE);
1243 &self.allocated.trace
1244 }
1245}
1246
1247#[allow(non_snake_case)]
1252struct Allocated {
1253 pub trace: Trace,
1254
1255 pub D_col: Aligned,
1257 pub C_col: Aligned,
1258 pub D_row: Aligned,
1259 pub R_row: Aligned,
1260
1261 pub D_col_ckpt: Aligned,
1263 pub C_col_ckpt: Aligned,
1264 pub D_row_ckpt: Aligned,
1265 pub R_row_ckpt: Aligned,
1266
1267 pub temp_buf1: Aligned,
1270 pub temp_buf2: Aligned,
1271
1272 query_len: usize,
1273 reference_len: usize,
1274 max_size: usize,
1275 trace_flag: bool
1276}
1277
1278impl Allocated {
1279 #[allow(non_snake_case)]
1280 fn new(query_len: usize, reference_len: usize, max_size: usize, trace_flag: bool, local_start: bool, free_query_start_gaps: bool) -> Self {
1281 unsafe {
1282 let trace = if trace_flag {
1283 Trace::new(query_len, reference_len, max_size, local_start, free_query_start_gaps)
1284 } else {
1285 Trace::new(0, 0, 0, false, false)
1286 };
1287 let D_col = Aligned::new(max_size);
1288 let C_col = Aligned::new(max_size);
1289 let D_row = Aligned::new(max_size);
1290 let R_row = Aligned::new(max_size);
1291 let D_col_ckpt = Aligned::new(max_size);
1292 let C_col_ckpt = Aligned::new(max_size);
1293 let D_row_ckpt = Aligned::new(max_size);
1294 let R_row_ckpt = Aligned::new(max_size);
1295 let temp_buf1 = Aligned::new(L);
1296 let temp_buf2 = Aligned::new(L);
1297
1298 Self {
1299 trace,
1300 D_col,
1301 C_col,
1302 D_row,
1303 R_row,
1304 D_col_ckpt,
1305 C_col_ckpt,
1306 D_row_ckpt,
1307 R_row_ckpt,
1308 temp_buf1,
1309 temp_buf2,
1310 query_len,
1311 reference_len,
1312 max_size,
1313 trace_flag
1314 }
1315 }
1316 }
1317
1318 #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))]
1319 #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))]
1320 #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))]
1321 #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))]
1322 unsafe fn clear(&mut self, query_len: usize, reference_len: usize, max_size: usize, trace_flag: bool) {
1323 assert!(query_len + reference_len <= self.query_len + self.reference_len);
1325 assert!(max_size <= self.max_size);
1326 assert_eq!(trace_flag, self.trace_flag);
1327
1328 self.trace.clear(query_len, reference_len);
1329 self.D_col.clear(max_size);
1330 self.C_col.clear(max_size);
1331 self.D_row.clear(max_size);
1332 self.R_row.clear(max_size);
1333 self.D_col_ckpt.clear(max_size);
1334 self.C_col_ckpt.clear(max_size);
1335 self.D_row_ckpt.clear(max_size);
1336 self.R_row_ckpt.clear(max_size);
1337 self.temp_buf1.clear(L);
1338 self.temp_buf2.clear(L);
1339 }
1340}
1341
1342#[derive(Clone)]
1344pub struct Trace {
1345 trace: Vec<TraceType>,
1346 trace2: Vec<TraceType>,
1347 right: Vec<u64>,
1348 block_start: Vec<u32>,
1349 block_size: Vec<u16>,
1350 zero_mask: Vec<TraceType>,
1351 trace_idx: usize,
1352 block_idx: usize,
1353 ckpt_trace_idx: usize,
1354 ckpt_block_idx: usize,
1355 query_len: usize,
1356 reference_len: usize,
1357 local_start: bool,
1358 free_query_start_gaps: bool
1359}
1360
1361impl Trace {
1362 #[inline]
1363 fn new(query_len: usize, reference_len: usize, max_size: usize, local_start: bool, free_query_start_gaps: bool) -> Self {
1364 let len = query_len + reference_len + 2;
1365 let trace = vec![0 as TraceType; (max_size / L) * (len + max_size * 2)];
1366 let trace2 = vec![0 as TraceType; (max_size / L) * (len + max_size * 2)];
1367 let right = vec![0u64; div_ceil(len, 64)];
1368 let block_start = vec![0u32; len * 2];
1369 let block_size = vec![0u16; len * 2];
1370 let zero_mask = if local_start {
1371 vec![0 as TraceType; (max_size / L) * (len + max_size * 2)]
1372 } else {
1373 vec![]
1374 };
1375
1376 Self {
1377 trace,
1378 trace2,
1379 right,
1380 block_start,
1381 block_size,
1382 zero_mask,
1383 trace_idx: 0,
1384 block_idx: 0,
1385 ckpt_trace_idx: 0,
1386 ckpt_block_idx: 0,
1387 query_len,
1388 reference_len,
1389 local_start,
1390 free_query_start_gaps,
1391 }
1392 }
1393
1394 #[inline]
1395 fn clear(&mut self, query_len: usize, reference_len: usize) {
1396 self.right.fill(0);
1398 self.trace_idx = 0;
1399 self.block_idx = 0;
1400 self.ckpt_trace_idx = 0;
1401 self.ckpt_block_idx = 0;
1402 self.query_len = query_len;
1403 self.reference_len = reference_len;
1404 }
1405
1406 #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))]
1407 #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))]
1408 #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))]
1409 #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))]
1410 #[inline]
1411 unsafe fn add_trace(&mut self, t: TraceType, t2: TraceType) {
1412 debug_assert!(self.trace_idx < self.trace.len());
1413 store_trace(self.trace.as_mut_ptr().add(self.trace_idx), t);
1414 store_trace(self.trace2.as_mut_ptr().add(self.trace_idx), t2);
1415 self.trace_idx += 1;
1416 }
1417
1418 #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))]
1419 #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))]
1420 #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))]
1421 #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))]
1422 #[inline]
1423 unsafe fn add_zero_mask(&mut self, mask: TraceType) {
1424 store_trace(self.zero_mask.as_mut_ptr().add(self.trace_idx), mask);
1425 }
1426
1427 #[inline]
1428 fn add_block(&mut self, i: usize, j: usize, width: usize, height: usize, right: bool) {
1429 debug_assert!(self.block_idx * 2 < self.block_start.len());
1430 unsafe {
1431 *self.block_start.as_mut_ptr().add(self.block_idx * 2) = i as u32;
1432 *self.block_start.as_mut_ptr().add(self.block_idx * 2 + 1) = j as u32;
1433 *self.block_size.as_mut_ptr().add(self.block_idx * 2) = height as u16;
1434 *self.block_size.as_mut_ptr().add(self.block_idx * 2 + 1) = width as u16;
1435
1436 let a = self.block_idx / 64;
1437 let b = self.block_idx % 64;
1438 let v = *self.right.as_ptr().add(a) & !(1 << b); *self.right.as_mut_ptr().add(a) = v | ((right as u64) << b);
1440
1441 self.block_idx += 1;
1442 }
1443 }
1444
1445 #[inline]
1446 fn add_trace_idx(&mut self, add: usize) {
1447 self.trace_idx += add;
1448 }
1449
1450 #[inline]
1451 fn save_ckpt(&mut self) {
1452 self.ckpt_trace_idx = self.trace_idx;
1453 self.ckpt_block_idx = self.block_idx;
1454 }
1455
1456 #[inline]
1459 fn restore_ckpt(&mut self) {
1460 self.trace_idx = self.ckpt_trace_idx;
1461 self.block_idx = self.ckpt_block_idx;
1462 }
1463
1464 pub fn cigar(&self, i: usize, j: usize, cigar: &mut Cigar) {
1470 self.cigar_core::<false>(i, j, None, None, cigar);
1471 }
1472
1473 pub fn cigar_eq(&self, query: &PaddedBytes, reference: &PaddedBytes, i: usize, j: usize, cigar: &mut Cigar) {
1479 self.cigar_core::<true>(i, j, Some(query), Some(reference), cigar);
1480 }
1481
1482 fn cigar_core<const EQ: bool>(&self, mut i: usize, mut j: usize, q: Option<&PaddedBytes>, r: Option<&PaddedBytes>, cigar: &mut Cigar) {
1483 assert!(i <= self.query_len && j <= self.reference_len, "Traceback cigar end position must be in bounds!");
1484 if EQ {
1485 assert!(q.is_some() && r.is_some());
1486 }
1487
1488 cigar.clear(i, j);
1489
1490 unsafe {
1491 let mut block_idx = self.block_idx;
1492 let mut trace_idx = self.trace_idx;
1493 let mut block_i;
1494 let mut block_j;
1495 let mut block_width;
1496 let mut block_height;
1497 let mut right;
1498
1499 #[derive(Copy, Clone, PartialEq, Debug)]
1500 enum Table {
1501 D = 0b00,
1502 C = 0b01,
1503 R = 0b10
1504 }
1505
1506 static OP_LUT: [[(Operation, usize, usize, Table); 64]; 2] = {
1509 let mut lut = [[(Operation::D, 0, 1, Table::D); 64]; 2];
1510
1511 let mut right = 0;
1519 while right < 2 {
1520 let mut trace = 0;
1521 while trace < 4 {
1522 let mut trace2 = 0;
1523 while trace2 < 4 {
1524 let mut table_idx = 0;
1525 while table_idx < 3 {
1526 let table = match table_idx {
1527 0b00 => Table::D,
1528 0b01 => Table::C,
1529 _ => Table::R
1530 };
1531
1532 let res = if right == 1 {
1533 match (trace, trace2, table) {
1534 (_, 0b00 | 0b10, Table::C) => (Operation::D, 0, 1, Table::C), (_, 0b01 | 0b11, Table::C) => (Operation::D, 0, 1, Table::D), (_, 0b00 | 0b01, Table::R) => (Operation::I, 1, 0, Table::R), (_, 0b10 | 0b11, Table::R) => (Operation::I, 1, 0, Table::D), (0b00, _, Table::D) => (Operation::M, 1, 1, Table::D), (0b01 | 0b11, 0b00 | 0b10, Table::D) => (Operation::D, 0, 1, Table::C), (0b01 | 0b11, 0b01 | 0b11, Table::D) => (Operation::D, 0, 1, Table::D), (0b10, 0b00 | 0b01, Table::D) => (Operation::I, 1, 0, Table::R), (0b10, 0b10 | 0b11, Table::D) => (Operation::I, 1, 0, Table::D), _ => (Operation::D, 0, 1, Table::D)
1544 }
1545 } else {
1546 match (trace, trace2, table) {
1548 (_, 0b00 | 0b10, Table::R) => (Operation::I, 1, 0, Table::R), (_, 0b01 | 0b11, Table::R) => (Operation::I, 1, 0, Table::D), (_, 0b00 | 0b01, Table::C) => (Operation::D, 0, 1, Table::C), (_, 0b10 | 0b11, Table::C) => (Operation::D, 0, 1, Table::D), (0b00, _, Table::D) => (Operation::M, 1, 1, Table::D), (0b01 | 0b11, 0b00 | 0b10, Table::D) => (Operation::I, 1, 0, Table::R), (0b01 | 0b11, 0b01 | 0b11, Table::D) => (Operation::I, 1, 0, Table::D), (0b10, 0b00 | 0b01, Table::D) => (Operation::D, 0, 1, Table::C), (0b10, 0b10 | 0b11, Table::D) => (Operation::D, 0, 1, Table::D), _ => (Operation::I, 1, 0, Table::D)
1558 }
1559 };
1560
1561 lut[right][(trace << 4) | (trace2 << 2) | (table as usize)] = res;
1562 table_idx += 1;
1563 }
1564 trace2 += 1;
1565 }
1566 trace += 1;
1567 }
1568 right += 1;
1569 }
1570
1571 lut
1572 };
1573
1574 let mut table = Table::D;
1575
1576 'outer: while i > 0 || j > 0 {
1577 loop {
1579 block_idx -= 1;
1580 block_i = *self.block_start.as_ptr().add(block_idx * 2) as usize;
1581 block_j = *self.block_start.as_ptr().add(block_idx * 2 + 1) as usize;
1582 block_height = *self.block_size.as_ptr().add(block_idx * 2) as usize;
1583 block_width = *self.block_size.as_ptr().add(block_idx * 2 + 1) as usize;
1584 trace_idx -= block_width * block_height / L;
1585
1586 if i >= block_i && j >= block_j {
1587 right = ((*self.right.as_ptr().add(block_idx / 64) >> (block_idx % 64)) & 0b1) as usize;
1588 break;
1589 }
1590 }
1591
1592 let lut = &*OP_LUT.as_ptr().add(right);
1594 if right > 0 {
1595 while i >= block_i && j >= block_j && (i > 0 || j > 0) {
1597 if self.free_query_start_gaps && i == 0 {
1598 break 'outer;
1600 }
1601
1602 let curr_i = i - block_i;
1603 let curr_j = j - block_j;
1604 let idx = trace_idx + curr_i / L + curr_j * (block_height / L);
1605
1606 if self.local_start && table == Table::D {
1607 let zero = ((*self.zero_mask.as_ptr().add(idx) >> ((curr_i % L) * 2)) & 0b1) > 0;
1609 if zero {
1610 break 'outer;
1611 }
1612 }
1613
1614 let t = ((*self.trace.as_ptr().add(idx) >> ((curr_i % L) * 2)) & 0b11) as usize;
1616 let t2 = ((*self.trace2.as_ptr().add(idx) >> ((curr_i % L) * 2)) & 0b11) as usize;
1617 let lut_idx = (t << 4) | (t2 << 2) | (table as usize);
1618 let lut_entry = &*lut.as_ptr().add(lut_idx);
1619
1620 let op = if EQ && lut_entry.0 == Operation::M {
1621 if q.unwrap_unchecked().get(i) == r.unwrap_unchecked().get(j) {
1622 Operation::Eq
1623 } else {
1624 Operation::X
1625 }
1626 } else {
1627 lut_entry.0
1628 };
1629 i -= lut_entry.1;
1630 j -= lut_entry.2;
1631 table = lut_entry.3;
1632 cigar.add(op);
1633 }
1634 } else {
1635 while i >= block_i && j >= block_j && (i > 0 || j > 0) {
1637 let curr_i = i - block_i;
1638 let curr_j = j - block_j;
1639 let idx = trace_idx + curr_j / L + curr_i * (block_width / L);
1640
1641 if self.local_start && table == Table::D {
1642 let zero = ((*self.zero_mask.as_ptr().add(idx) >> ((curr_j % L) * 2)) & 0b1) > 0;
1644 if zero {
1645 break 'outer;
1646 }
1647 }
1648
1649 let t = ((*self.trace.as_ptr().add(idx) >> ((curr_j % L) * 2)) & 0b11) as usize;
1651 let t2 = ((*self.trace2.as_ptr().add(idx) >> ((curr_j % L) * 2)) & 0b11) as usize;
1652 let lut_idx = (t << 4) | (t2 << 2) | (table as usize);
1653 let lut_entry = &*lut.as_ptr().add(lut_idx);
1654
1655 let op = if EQ && lut_entry.0 == Operation::M {
1656 if q.unwrap_unchecked().get(i) == r.unwrap_unchecked().get(j) {
1657 Operation::Eq
1658 } else {
1659 Operation::X
1660 }
1661 } else {
1662 lut_entry.0
1663 };
1664 i -= lut_entry.1;
1665 j -= lut_entry.2;
1666 table = lut_entry.3;
1667 cigar.add(op);
1668 }
1669 }
1670 }
1671 }
1672 }
1673
1674 pub fn blocks(&self) -> Vec<Rectangle> {
1677 let mut res = Vec::with_capacity(self.block_idx);
1678
1679 for i in 0..self.block_idx {
1680 unsafe {
1681 res.push(Rectangle {
1682 row: *self.block_start.as_ptr().add(i * 2) as usize,
1683 col: *self.block_start.as_ptr().add(i * 2 + 1) as usize,
1684 height: *self.block_size.as_ptr().add(i * 2) as usize,
1685 width: *self.block_size.as_ptr().add(i * 2 + 1) as usize
1686 });
1687 }
1688 }
1689
1690 res
1691 }
1692}
1693
1694#[derive(Copy, Clone, PartialEq, Debug)]
1696pub struct Rectangle {
1697 pub row: usize,
1698 pub col: usize,
1699 pub width: usize,
1700 pub height: usize
1701}
1702
1703#[inline]
1704fn clamp(x: i32) -> i16 {
1705 cmp::min(cmp::max(x, i16::MIN as i32), i16::MAX as i32) as i16
1706}
1707
1708#[inline]
1709fn div_ceil(n: usize, d: usize) -> usize {
1710 (n + d - 1) / d
1711}
1712
1713struct Aligned {
1715 layout: alloc::Layout,
1716 ptr: *const i16
1717}
1718
1719impl Aligned {
1720 pub unsafe fn new(block_size: usize) -> Self {
1721 let layout = alloc::Layout::from_size_align_unchecked(block_size * 2, L_BYTES);
1723 let ptr = alloc::alloc_zeroed(layout) as *const i16;
1724 Self { layout, ptr }
1725 }
1726
1727 #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))]
1728 #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))]
1729 #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))]
1730 #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))]
1731 pub unsafe fn clear(&mut self, block_size: usize) {
1732 let mut i = 0;
1733 while i < block_size {
1734 simd_store(self.ptr.add(i) as _, simd_set1_i16(MIN));
1735 i += L;
1736 }
1737 }
1738
1739 #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))]
1740 #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))]
1741 #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))]
1742 #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))]
1743 #[inline]
1744 pub unsafe fn set_vec(&mut self, o: &Aligned, idx: usize) {
1745 simd_store(self.ptr.add(idx) as _, simd_load(o.as_ptr().add(idx) as _));
1746 }
1747
1748 #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))]
1749 #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))]
1750 #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))]
1751 #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))]
1752 #[inline]
1753 pub unsafe fn copy_vec(&mut self, new_idx: usize, idx: usize) {
1754 simd_store(self.ptr.add(new_idx) as _, simd_load(self.ptr.add(idx) as _));
1755 }
1756
1757 #[inline]
1758 pub fn get(&self, i: usize) -> i16 {
1759 unsafe { *self.ptr.add(i) }
1760 }
1761
1762 #[allow(dead_code)]
1763 #[inline]
1764 pub fn set(&mut self, i: usize, v: i16) {
1765 unsafe { ptr::write(self.ptr.add(i) as _, v); }
1766 }
1767
1768 #[inline]
1769 pub fn as_mut_ptr(&mut self) -> *mut i16 {
1770 self.ptr as _
1771 }
1772
1773 #[inline]
1774 pub fn as_ptr(&self) -> *const i16 {
1775 self.ptr
1776 }
1777}
1778
1779impl Drop for Aligned {
1780 fn drop(&mut self) {
1781 unsafe { alloc::dealloc(self.ptr as _, self.layout); }
1782 }
1783}
1784
1785#[derive(Clone, PartialEq, Debug)]
1790pub struct PaddedBytes {
1791 s: Vec<u8>,
1792 len: usize
1793}
1794
1795impl PaddedBytes {
1796 pub fn new<M: Matrix>(len: usize, block_size: usize) -> Self {
1799 Self {
1800 s: vec![M::convert_char(M::NULL); 1 + len + block_size],
1801 len
1802 }
1803 }
1804
1805 pub fn set_bytes<M: Matrix>(&mut self, b: &[u8], block_size: usize) {
1807 self.s[0] = M::convert_char(M::NULL);
1808 self.s[1..1 + b.len()].copy_from_slice(b);
1809 self.s[1..1 + b.len()].iter_mut().for_each(|c| *c = M::convert_char(*c));
1810 self.s[1 + b.len()..1 + b.len() + block_size].fill(M::convert_char(M::NULL));
1811 self.len = b.len();
1812 }
1813
1814 pub fn set_bytes_rev<M: Matrix>(&mut self, b: &[u8], block_size: usize) {
1816 self.s[0] = M::convert_char(M::NULL);
1817 self.s[1..1 + b.len()].copy_from_slice(b);
1818 self.s[1..1 + b.len()].reverse();
1819 self.s[1..1 + b.len()].iter_mut().for_each(|c| *c = M::convert_char(*c));
1820 self.s[1 + b.len()..1 + b.len() + block_size].fill(M::convert_char(M::NULL));
1821 self.len = b.len();
1822 }
1823
1824 #[inline]
1829 pub fn from_bytes<M: Matrix>(b: &[u8], block_size: usize) -> Self {
1830 let mut v = b.to_owned();
1831 let len = v.len();
1832 v.insert(0, M::NULL);
1833 v.resize(v.len() + block_size, M::NULL);
1834 v.iter_mut().for_each(|c| *c = M::convert_char(*c));
1835 Self { s: v, len }
1836 }
1837
1838 #[inline]
1843 pub fn from_str<M: Matrix>(s: &str, block_size: usize) -> Self {
1844 Self::from_bytes::<M>(s.as_bytes(), block_size)
1845 }
1846
1847 #[inline]
1852 pub fn from_string<M: Matrix>(s: String, block_size: usize) -> Self {
1853 let mut v = s.into_bytes();
1854 let len = v.len();
1855 v.insert(0, M::NULL);
1856 v.resize(v.len() + block_size, M::NULL);
1857 v.iter_mut().for_each(|c| *c = M::convert_char(*c));
1858 Self { s: v, len }
1859 }
1860
1861 #[inline]
1863 pub unsafe fn get(&self, i: usize) -> u8 {
1864 *self.s.as_ptr().add(i)
1865 }
1866
1867 #[inline]
1869 pub unsafe fn set(&mut self, i: usize, c: u8) {
1870 *self.s.as_mut_ptr().add(i) = c;
1871 }
1872
1873 #[inline]
1875 pub unsafe fn as_ptr(&self, i: usize) -> *const u8 {
1876 self.s.as_ptr().add(i)
1877 }
1878
1879 #[inline]
1881 pub fn len(&self) -> usize {
1882 self.len
1883 }
1884}
1885
1886#[repr(C)]
1888#[derive(Copy, Clone, PartialEq, Debug)]
1889pub struct AlignResult {
1890 pub score: i32,
1891 pub query_idx: usize,
1892 pub reference_idx: usize
1893}
1894
1895#[derive(Copy, Clone, PartialEq, Debug)]
1896enum Direction {
1897 Right,
1898 Down,
1899 Grow
1900}
1901
1902#[cfg(test)]
1903mod tests {
1904 use crate::scores::*;
1905
1906 use super::*;
1907
1908 #[test]
1909 fn test_no_x_drop() {
1910 let test_gaps = Gaps { open: -11, extend: -1 };
1911
1912 let mut a = Block::<false, false>::new(100, 100, 16);
1913
1914 let r = PaddedBytes::from_bytes::<AAMatrix>(b"", 16);
1915 let q = PaddedBytes::from_bytes::<AAMatrix>(b"", 16);
1916 a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 0);
1917 assert_eq!(a.res().score, 0);
1918
1919 let r = PaddedBytes::from_bytes::<AAMatrix>(b"AAAA", 16);
1920 let q = PaddedBytes::from_bytes::<AAMatrix>(b"", 16);
1921 a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 0);
1922 assert_eq!(a.res().score, -14);
1923
1924 let r = PaddedBytes::from_bytes::<AAMatrix>(b"", 16);
1925 let q = PaddedBytes::from_bytes::<AAMatrix>(b"AAAA", 16);
1926 a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 0);
1927 assert_eq!(a.res().score, -14);
1928
1929 let r = PaddedBytes::from_bytes::<AAMatrix>(b"AAAA", 16);
1930 let q = PaddedBytes::from_bytes::<AAMatrix>(b"AARA", 16);
1931 a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 0);
1932 assert_eq!(a.res().score, 11);
1933
1934 let r = PaddedBytes::from_bytes::<AAMatrix>(b"AAAAAAAA", 16);
1935 let q = PaddedBytes::from_bytes::<AAMatrix>(b"AARAAAA", 16);
1936 a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 0);
1937 assert_eq!(a.res().score, 12);
1938
1939 let r = PaddedBytes::from_bytes::<AAMatrix>(b"AAAA", 16);
1940 let q = PaddedBytes::from_bytes::<AAMatrix>(b"AAAA", 16);
1941 a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 0);
1942 assert_eq!(a.res().score, 16);
1943
1944 let r = PaddedBytes::from_bytes::<AAMatrix>(b"AAAA", 16);
1945 let q = PaddedBytes::from_bytes::<AAMatrix>(b"AARA", 16);
1946 a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 0);
1947 assert_eq!(a.res().score, 11);
1948
1949 let r = PaddedBytes::from_bytes::<AAMatrix>(b"AAAA", 16);
1950 let q = PaddedBytes::from_bytes::<AAMatrix>(b"RRRR", 16);
1951 a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 0);
1952 assert_eq!(a.res().score, -4);
1953
1954 let r = PaddedBytes::from_bytes::<AAMatrix>(b"AAAA", 16);
1955 let q = PaddedBytes::from_bytes::<AAMatrix>(b"AAA", 16);
1956 a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 0);
1957 assert_eq!(a.res().score, 1);
1958
1959 let test_gaps2 = Gaps { open: -2, extend: -1 };
1960
1961 let r = PaddedBytes::from_bytes::<NucMatrix>(b"AAAN", 16);
1962 let q = PaddedBytes::from_bytes::<NucMatrix>(b"ATAA", 16);
1963 a.align(&q, &r, &NW1, test_gaps2, 16..=16, 0);
1964 assert_eq!(a.res().score, 0);
1965
1966 let r = PaddedBytes::from_bytes::<NucMatrix>(b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 16);
1967 let q = PaddedBytes::from_bytes::<NucMatrix>(b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 16);
1968 a.align(&q, &r, &NW1, test_gaps2, 16..=16, 0);
1969 assert_eq!(a.res().score, 32);
1970
1971 let r = PaddedBytes::from_bytes::<NucMatrix>(b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 16);
1972 let q = PaddedBytes::from_bytes::<NucMatrix>(b"TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT", 16);
1973 a.align(&q, &r, &NW1, test_gaps2, 16..=16, 0);
1974 assert_eq!(a.res().score, -32);
1975
1976 let r = PaddedBytes::from_bytes::<NucMatrix>(b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 16);
1977 let q = PaddedBytes::from_bytes::<NucMatrix>(b"TATATATATATATATATATATATATATATATA", 16);
1978 a.align(&q, &r, &NW1, test_gaps2, 16..=16, 0);
1979 assert_eq!(a.res().score, 0);
1980
1981 let r = PaddedBytes::from_bytes::<NucMatrix>(b"TTAAAAAAATTTTTTTTTTTT", 16);
1982 let q = PaddedBytes::from_bytes::<NucMatrix>(b"TTTTTTTTAAAAAAATTTTTTTTT", 16);
1983 a.align(&q, &r, &NW1, test_gaps2, 16..=16, 0);
1984 assert_eq!(a.res().score, 7);
1985
1986 let r = PaddedBytes::from_bytes::<NucMatrix>(b"AAAA", 16);
1987 let q = PaddedBytes::from_bytes::<NucMatrix>(b"C", 16);
1988 a.align(&q, &r, &NW1, test_gaps2, 16..=16, 0);
1989 assert_eq!(a.res().score, -5);
1990 a.align(&r, &q, &NW1, test_gaps2, 16..=16, 0);
1991 assert_eq!(a.res().score, -5);
1992 }
1993
1994 #[test]
1995 fn test_x_drop() {
1996 let test_gaps = Gaps { open: -11, extend: -1 };
1997
1998 let mut a = Block::<false, true>::new(100, 100, 16);
1999
2000 let r = PaddedBytes::from_bytes::<AAMatrix>(b"", 16);
2001 let q = PaddedBytes::from_bytes::<AAMatrix>(b"", 16);
2002 a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 1);
2003 assert_eq!(a.res(), AlignResult { score: 0, query_idx: 0, reference_idx: 0 });
2004
2005 let r = PaddedBytes::from_bytes::<AAMatrix>(b"AAAA", 16);
2006 let q = PaddedBytes::from_bytes::<AAMatrix>(b"", 16);
2007 a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 1);
2008 assert_eq!(a.res(), AlignResult { score: 0, query_idx: 0, reference_idx: 0 });
2009
2010 let r = PaddedBytes::from_bytes::<AAMatrix>(b"", 16);
2011 let q = PaddedBytes::from_bytes::<AAMatrix>(b"AAAA", 16);
2012 a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 1);
2013 assert_eq!(a.res(), AlignResult { score: 0, query_idx: 0, reference_idx: 0 });
2014
2015 let r = PaddedBytes::from_bytes::<AAMatrix>(b"AAARRA", 16);
2016 let q = PaddedBytes::from_bytes::<AAMatrix>(b"AAAAAA", 16);
2017 a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 1);
2018 assert_eq!(a.res(), AlignResult { score: 14, query_idx: 6, reference_idx: 6 });
2019
2020 let r = PaddedBytes::from_bytes::<AAMatrix>(b"AAAAAAAAAAAAAAARRRRRRRRRRRRRRRRAAAAAAAAAAAAA", 16);
2021 let q = PaddedBytes::from_bytes::<AAMatrix>(b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 16);
2022 a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 1);
2023 assert_eq!(a.res(), AlignResult { score: 60, query_idx: 15, reference_idx: 15 });
2024
2025 let mut a = Block::<true, true>::new(2048, 2048, 2048);
2026 let long_str = std::iter::repeat(b'A').take(2048).collect::<Vec<_>>();
2027 let r = PaddedBytes::from_bytes::<AAMatrix>(&long_str, 2048);
2028 let q = PaddedBytes::from_bytes::<AAMatrix>(&long_str, 2048);
2029 a.align(&q, &r, &BLOSUM62, test_gaps, 2048..=2048, 100);
2030 assert_eq!(a.res(), AlignResult { score: 8192, query_idx: 2048, reference_idx: 2048 });
2031
2032 let mut a = Block::<true, true>::new(0, 0, 16);
2033
2034 let r = PaddedBytes::from_bytes::<AAMatrix>(b"", 16);
2035 let q = PaddedBytes::from_bytes::<AAMatrix>(b"", 16);
2036 a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 1);
2037 assert_eq!(a.res(), AlignResult { score: 0, query_idx: 0, reference_idx: 0 });
2038
2039 let mut a = Block::<true, true>::new(4, 4, 16);
2040
2041 let r = PaddedBytes::from_bytes::<AAMatrix>(b"AAAA", 16);
2042 let q = PaddedBytes::from_bytes::<AAMatrix>(b"", 16);
2043 a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 1);
2044 assert_eq!(a.res(), AlignResult { score: 0, query_idx: 0, reference_idx: 0 });
2045
2046 let r = PaddedBytes::from_bytes::<AAMatrix>(b"", 16);
2047 let q = PaddedBytes::from_bytes::<AAMatrix>(b"AAAA", 16);
2048 a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 1);
2049 assert_eq!(a.res(), AlignResult { score: 0, query_idx: 0, reference_idx: 0 });
2050 }
2051
2052 #[test]
2053 fn test_trace() {
2054 let test_gaps = Gaps { open: -11, extend: -1 };
2055
2056 let mut cigar = Cigar::new(100, 100);
2057
2058 let mut a = Block::<true, false>::new(100, 100, 16);
2059
2060 let r = PaddedBytes::from_bytes::<AAMatrix>(b"AAARRA", 16);
2061 let q = PaddedBytes::from_bytes::<AAMatrix>(b"AAAAAA", 16);
2062 a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 0);
2063 let res = a.res();
2064 assert_eq!(res, AlignResult { score: 14, query_idx: 6, reference_idx: 6 });
2065 a.trace().cigar_eq(&q, &r, res.query_idx, res.reference_idx, &mut cigar);
2066 assert_eq!(cigar.to_string(), "3=2X1=");
2067
2068 let r = PaddedBytes::from_bytes::<AAMatrix>(b"AAAA", 16);
2069 let q = PaddedBytes::from_bytes::<AAMatrix>(b"AAA", 16);
2070 a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 0);
2071 let res = a.res();
2072 assert_eq!(res, AlignResult { score: 1, query_idx: 3, reference_idx: 4 });
2073 a.trace().cigar(res.query_idx, res.reference_idx, &mut cigar);
2074 assert_eq!(cigar.to_string(), "3M1D");
2075
2076 let test_gaps2 = Gaps { open: -2, extend: -1 };
2077
2078 let r = PaddedBytes::from_bytes::<NucMatrix>(b"TTAAAAAAATTTTTTTTTTTT", 16);
2079 let q = PaddedBytes::from_bytes::<NucMatrix>(b"TTTTTTTTAAAAAAATTTTTTTTT", 16);
2080 a.align(&q, &r, &NW1, test_gaps2, 16..=16, 0);
2081 let res = a.res();
2082 assert_eq!(res, AlignResult { score: 7, query_idx: 24, reference_idx: 21 });
2083 a.trace().cigar(res.query_idx, res.reference_idx, &mut cigar);
2084 assert_eq!(cigar.to_string(), "2M6I16M3D");
2085
2086 let mut a = Block::<true, false>::new(100, 100, 32);
2087 let q = PaddedBytes::from_bytes::<NucMatrix>(b"AAAAAAAAATTGCGCT", 32);
2088 let r = PaddedBytes::from_bytes::<NucMatrix>(b"AAAAAAAAAGCGC", 32);
2089
2090 a.align(&q, &r, &NW1, test_gaps2, 32..=32, 0);
2091 let res = a.res();
2092 assert_eq!(res, AlignResult { score: 8, query_idx: 16, reference_idx: 13 });
2093 a.trace().cigar_eq(&q, &r, res.query_idx, res.reference_idx, &mut cigar);
2094 assert_eq!(cigar.to_string(), "9=2I4=1I");
2095
2096 let matrix = NucMatrix::new_simple(2, -1);
2097 let test_gaps3 = Gaps { open: -5, extend: -2 };
2098 a.align(&q, &r, &matrix, test_gaps3, 32..=32, 0);
2099 let res = a.res();
2100 assert_eq!(res, AlignResult { score: 14, query_idx: 16, reference_idx: 13 });
2101 a.trace().cigar_eq(&q, &r, res.query_idx, res.reference_idx, &mut cigar);
2102 assert_eq!(cigar.to_string(), "9=2I4=1I");
2103 }
2104
2105 #[test]
2106 fn test_bytes() {
2107 let test_gaps = Gaps { open: -2, extend: -1 };
2108
2109 let mut a = Block::<false, false>::new(100, 100, 16);
2110
2111 let r = PaddedBytes::from_bytes::<ByteMatrix>(b"AAAaaA", 16);
2112 let q = PaddedBytes::from_bytes::<ByteMatrix>(b"AAAAAA", 16);
2113 a.align(&q, &r, &BYTES1, test_gaps, 16..=16, 0);
2114 assert_eq!(a.res().score, 2);
2115
2116 let r = PaddedBytes::from_bytes::<ByteMatrix>(b"abcdefg", 16);
2117 let q = PaddedBytes::from_bytes::<ByteMatrix>(b"abdefg", 16);
2118 a.align(&q, &r, &BYTES1, test_gaps, 16..=16, 0);
2119 assert_eq!(a.res().score, 4);
2120 }
2121
2122 #[test]
2123 fn test_profile() {
2124 let mut a = Block::<false, false>::new(100, 100, 16);
2125 let r = AAProfile::from_bytes(b"AAAA", 16, 1, -1, -1, 0, -1, -1);
2126 let q = PaddedBytes::from_bytes::<AAMatrix>(b"AAAA", 16);
2127 a.align_profile(&q, &r, 16..=16, 0);
2128 assert_eq!(a.res().score, 4);
2129
2130 let r = AAProfile::from_bytes(b"AATTAA", 16, 1, -1, -1, 0, -1, -1);
2131 let q = PaddedBytes::from_bytes::<AAMatrix>(b"AAAA", 16);
2132 a.align_profile(&q, &r, 16..=16, 0);
2133 assert_eq!(a.res().score, 1);
2134
2135 let r = AAProfile::from_bytes(b"AATTAA", 16, 1, -1, -1, -1, -1, -1);
2136 let q = PaddedBytes::from_bytes::<AAMatrix>(b"AAAA", 16);
2137 a.align_profile(&q, &r, 16..=16, 0);
2138 assert_eq!(a.res().score, 0);
2139
2140 let mut a = Block::<true, false>::new(100, 100, 16);
2141 let mut cigar = Cigar::new(100, 100);
2142
2143 let r = AAProfile::from_bytes(b"TTAAAAAAATTTTTTTTTTTT", 16, 1, -1, -1, 0, -1, -1);
2144 let q = PaddedBytes::from_bytes::<AAMatrix>(b"TTTTTTTTAAAAAAATTTTTTTTT", 16);
2145 a.align_profile(&q, &r, 16..=16, 0);
2146 let res = a.res();
2147 assert_eq!(res, AlignResult { score: 7, query_idx: 24, reference_idx: 21 });
2148 a.trace().cigar(res.query_idx, res.reference_idx, &mut cigar);
2149 assert_eq!(cigar.to_string(), "2M6I16M3D");
2150
2151 let r = AAProfile::from_bytes(b"TTAAAAAAATTTTTTTTTTTT", 16, 1, -1, -1, -1, -1, -1);
2152 let q = PaddedBytes::from_bytes::<AAMatrix>(b"TTTTTTTTAAAAAAATTTTTTTTT", 16);
2153 a.align_profile(&q, &r, 16..=16, 0);
2154 let res = a.res();
2155 assert_eq!(res, AlignResult { score: 6, query_idx: 24, reference_idx: 21 });
2156 a.trace().cigar(res.query_idx, res.reference_idx, &mut cigar);
2157 assert_eq!(cigar.to_string(), "2M6I16M3D");
2158
2159 let mut r = AAProfile::from_bytes(b"TTAAAAAAATTTTTTTTTTTT", 16, 1, -1, -2, -1, -1, -1);
2160 r.set_gap_close_C(17, -1);
2161 r.set_gap_close_C(19, 0);
2162 let q = PaddedBytes::from_bytes::<AAMatrix>(b"TTTTTTTTAAAAAAATTTTTTTTT", 16);
2163 a.align_profile(&q, &r, 16..=16, 0);
2164 let res = a.res();
2165 assert_eq!(res, AlignResult { score: 6, query_idx: 24, reference_idx: 21 });
2166 a.trace().cigar(res.query_idx, res.reference_idx, &mut cigar);
2167 assert_eq!(cigar.to_string(), "2M6I14M3D2M");
2168 }
2169
2170 #[test]
2171 fn test_local_and_free_query_gaps() {
2172 let test_gaps = Gaps { open: -2, extend: -1 };
2173
2174 let mut local = Block::<true, false, true, false>::new(100, 100, 32);
2175 let mut cigar = Cigar::new(100, 100);
2176
2177 let r = PaddedBytes::from_bytes::<NucMatrix>(b"TTTTAAAAAA", 32);
2178 let q = PaddedBytes::from_bytes::<NucMatrix>(b"CCCCCCCCCCAAAAAA", 32);
2179 local.align(&q, &r, &NW1, test_gaps, 32..=32, 0);
2180 let res = local.res();
2181 assert_eq!(res, AlignResult { score: 6, query_idx: 16, reference_idx: 10 });
2182 local.trace().cigar_eq(&q, &r, res.query_idx, res.reference_idx, &mut cigar);
2183 assert_eq!(cigar.to_string(), "6=");
2184
2185 let mut local = Block::<true, true, true, false>::new(100, 100, 32);
2186
2187 let r = PaddedBytes::from_bytes::<NucMatrix>(b"TTTTAAAAAATTTTTTT", 32);
2188 let q = PaddedBytes::from_bytes::<NucMatrix>(b"CCCCCCCCCCAAAAAACCCCCCCCCCCC", 32);
2189 local.align(&q, &r, &NW1, test_gaps, 32..=32, 100);
2190 let res = local.res();
2191 assert_eq!(res, AlignResult { score: 6, query_idx: 16, reference_idx: 10 });
2192 local.trace().cigar_eq(&q, &r, res.query_idx, res.reference_idx, &mut cigar);
2193 assert_eq!(cigar.to_string(), "6=");
2194
2195 let mut q_start = Block::<true, false, false, true>::new(100, 100, 32);
2196
2197 let r = PaddedBytes::from_bytes::<NucMatrix>(b"CCCCCCCCCCAAAAAA", 32);
2198 let q = PaddedBytes::from_bytes::<NucMatrix>(b"AAAAAA", 32);
2199 q_start.align(&q, &r, &NW1, test_gaps, 32..=32, 0);
2200 let res = q_start.res();
2201 assert_eq!(res, AlignResult { score: 6, query_idx: 6, reference_idx: 16 });
2202 q_start.trace().cigar_eq(&q, &r, res.query_idx, res.reference_idx, &mut cigar);
2203 assert_eq!(cigar.to_string(), "6=");
2204
2205 let r = PaddedBytes::from_bytes::<NucMatrix>(b"CCCCCCCCCCAAATAA", 32);
2206 let q = PaddedBytes::from_bytes::<NucMatrix>(b"AAAAAA", 32);
2207 q_start.align(&q, &r, &NW1, test_gaps, 32..=32, 0);
2208 let res = q_start.res();
2209 assert_eq!(res, AlignResult { score: 4, query_idx: 6, reference_idx: 16 });
2210 q_start.trace().cigar_eq(&q, &r, res.query_idx, res.reference_idx, &mut cigar);
2211 assert_eq!(cigar.to_string(), "3=1X2=");
2212
2213 let mut q_end = Block::<true, false, false, false, true>::new(100, 100, 32);
2214
2215 let r = PaddedBytes::from_bytes::<NucMatrix>(b"AAAAAACCCCCCCCCC", 32);
2216 let q = PaddedBytes::from_bytes::<NucMatrix>(b"AAAAAA", 32);
2217 q_end.align(&q, &r, &NW1, test_gaps, 32..=32, 0);
2218 let res = q_end.res();
2219 assert_eq!(res, AlignResult { score: 6, query_idx: 6, reference_idx: 6 });
2220 q_end.trace().cigar_eq(&q, &r, res.query_idx, res.reference_idx, &mut cigar);
2221 assert_eq!(cigar.to_string(), "6=");
2222
2223 let r = PaddedBytes::from_bytes::<NucMatrix>(b"AAATAACCCCCCCCCC", 32);
2224 let q = PaddedBytes::from_bytes::<NucMatrix>(b"AAAAAA", 32);
2225 q_end.align(&q, &r, &NW1, test_gaps, 32..=32, 0);
2226 let res = q_end.res();
2227 assert_eq!(res, AlignResult { score: 4, query_idx: 6, reference_idx: 6 });
2228 q_end.trace().cigar_eq(&q, &r, res.query_idx, res.reference_idx, &mut cigar);
2229 assert_eq!(cigar.to_string(), "3=1X2=");
2230 }
2231}