block_aligner/lib.rs
1//! SIMD-accelerated library for computing global and X-drop affine
2//! gap penalty sequence-to-sequence or sequence-to-profile alignments
3//! using an adaptive block-based algorithm.
4//!
5//! Currently, SSE2, AVX2, Neon, and WASM SIMD are supported.
6//!
7//! ## Example
8//! ```
9//! use block_aligner::{cigar::*, scan_block::*, scores::*};
10//!
11//! let min_block_size = 32;
12//! let max_block_size = 256;
13//!
14//! // A gap of length n will cost: open + extend * (n - 1)
15//! let gaps = Gaps { open: -2, extend: -1 };
16//!
17//! // Note that PaddedBytes, Block, and Cigar can be initialized with sequence length
18//! // and block size upper bounds and be reused later for shorter sequences, to avoid
19//! // repeated allocations.
20//! let r = PaddedBytes::from_bytes::<NucMatrix>(b"TTAAAAAAATTTTTTTTTTTT", max_block_size);
21//! let q = PaddedBytes::from_bytes::<NucMatrix>(b"TTTTTTTTAAAAAAATTTTTTTTT", max_block_size);
22//!
23//! // Align with traceback, but no X-drop threshold (global alignment).
24//! let mut a = Block::<true, false>::new(q.len(), r.len(), max_block_size);
25//! a.align(&q, &r, &NW1, gaps, min_block_size..=max_block_size, 0);
26//! let res = a.res();
27//!
28//! assert_eq!(res, AlignResult { score: 7, query_idx: 24, reference_idx: 21 });
29//!
30//! let mut cigar = Cigar::new(res.query_idx, res.reference_idx);
31//! // Compute traceback and resolve =/X (matches/mismatches).
32//! a.trace().cigar_eq(&q, &r, res.query_idx, res.reference_idx, &mut cigar);
33//!
34//! assert_eq!(cigar.to_string(), "2=6I16=3D");
35//! ```
36//!
37//! ## Tuning block sizes
38//!
39//! For long, noisy Nanopore reads, a min block size of ~1% sequence length and a max block size
40//! of ~10% sequence length performs well (tested with reads up to ~50kbps).
41//! For proteins, a min block size of 32 and a max block size of 256 performs well.
42//! Using a minimum block size that is at least 32 is recommended for most applications.
43//! Using a maximum block size greater than `2^14 = 16384` is not recommended.
44//! If the alignment scores are saturating (score too large), then use a smaller block size.
45//! Let me know how block aligner performs on your data!
46//!
47//! When building your code that uses this library, it is important to specify the
48//! correct feature flags: `simd_sse2`, `simd_avx2`, `simd_neon`, or `simd_wasm`.
49//! More information on specifying different features for different platforms
50//! with the same dependency [here](https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html#platform-specific-dependencies).
51
52// special SIMD instruction set modules adapted for this library
53// their types and lengths are abstracted out
54
55#[cfg(feature = "simd_sse2")]
56#[macro_use]
57#[doc(hidden)]
58/// cbindgen:ignore
59pub mod sse2;
60
61#[cfg(feature = "simd_sse2")]
62pub use sse2::L;
63
64#[cfg(feature = "simd_avx2")]
65#[macro_use]
66#[doc(hidden)]
67/// cbindgen:ignore
68pub mod avx2;
69
70#[cfg(feature = "simd_avx2")]
71pub use avx2::L;
72
73#[cfg(feature = "simd_wasm")]
74#[macro_use]
75#[doc(hidden)]
76/// cbindgen:ignore
77pub mod simd128;
78
79#[cfg(feature = "simd_wasm")]
80pub use simd128::L;
81
82#[cfg(feature = "simd_neon")]
83#[macro_use]
84#[doc(hidden)]
85/// cbindgen:ignore
86pub mod neon;
87
88#[cfg(feature = "simd_neon")]
89pub use neon::L;
90
91#[cfg(any(feature = "simd_sse2", feature = "simd_avx2", feature = "simd_wasm", feature = "simd_neon"))]
92pub mod scan_block;
93#[cfg(any(feature = "simd_sse2", feature = "simd_avx2", feature = "simd_wasm", feature = "simd_neon"))]
94pub mod scores;
95#[cfg(any(feature = "simd_sse2", feature = "simd_avx2", feature = "simd_wasm", feature = "simd_neon"))]
96pub mod cigar;
97
98#[cfg(any(feature = "simd_sse2", feature = "simd_avx2", feature = "simd_wasm", feature = "simd_neon"))]
99#[doc(hidden)]
100pub mod ffi;
101
102#[cfg(not(any(feature = "no_simd", feature = "simd_sse2", feature = "simd_avx2", feature = "simd_wasm", feature = "simd_neon")))]
103compile_error!("No SIMD feature flag specified! Specify \"no_simd\" to disable all SIMD features.");
104
105/// Calculate the percentage of a length, rounded to the next power of two.
106///
107/// This is useful for computing the min and max block sizes for sequences of a certain
108/// length by using percentages. The returned value is at least 32 and at most 16384.
109pub fn percent_len(len: usize, p: f32) -> usize {
110 ((p * (len as f32)).round() as usize).max(32).next_power_of_two().min(1 << 14)
111}