biodiff_align/
wfa2.rs

1use serde::{Deserialize, Serialize};
2
3#[cfg(feature = "wfa2")]
4pub const WFA2_AVAILABLE: bool = true;
5#[cfg(not(feature = "wfa2"))]
6pub const WFA2_AVAILABLE: bool = false;
7
8/// The WFA2 aligner, without any extra options.
9#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, Default)]
10pub struct Wfa2;
11
12#[cfg(feature = "wfa2")]
13mod implemented {
14    use biodiff_wfa2_sys::*;
15    use std::{
16        ffi::{c_char, c_int},
17        marker::PhantomData,
18    };
19
20    use crate::{Align, AlignAlgorithm, AlignMode, CheckStatus, InternalMode, Op};
21
22    use super::Wfa2;
23    fn settings(
24        algo: &AlignAlgorithm,
25        mode: InternalMode,
26        text_len: usize,
27    ) -> wavefront_aligner_attr_t {
28        let mut attributes = unsafe { wavefront_aligner_attr_default };
29        attributes.heuristic.strategy = wf_heuristic_strategy_wf_heuristic_none;
30        attributes.alignment_scope = alignment_scope_t_compute_alignment;
31
32        attributes.distance_metric = distance_metric_t_gap_affine;
33        attributes.affine_penalties.gap_opening = -algo.gap_open as c_int;
34        attributes.affine_penalties.gap_extension = -algo.gap_extend as c_int;
35        attributes.affine_penalties.mismatch = -algo.mismatch_score as c_int;
36        attributes.affine_penalties.match_ = -algo.match_score as c_int;
37
38        match mode {
39            InternalMode::Global => {
40                attributes.alignment_form.span = alignment_span_t_alignment_end2end;
41                attributes.memory_mode = wavefront_memory_t_wavefront_memory_ultralow;
42            }
43            InternalMode::Semiglobal => {
44                attributes.alignment_form.span = alignment_span_t_alignment_endsfree;
45                attributes.alignment_form.pattern_begin_free = 0;
46                attributes.alignment_form.pattern_end_free = 0;
47                attributes.alignment_form.text_begin_free = text_len as c_int;
48                attributes.alignment_form.text_end_free = text_len as c_int;
49                // wfa2 currently only supports global alignment for ultra low memory mode
50                attributes.memory_mode = wavefront_memory_t_wavefront_memory_high;
51            }
52        }
53
54        attributes
55    }
56
57    pub struct Aligner<'a>(*mut wavefront_aligner_t, PhantomData<&'a ()>);
58
59    impl<'a> Aligner<'a> {
60        fn new(settings: &mut wavefront_aligner_attr_t, x: &[u8], y: &[u8]) -> Self {
61            let aligner = unsafe { wavefront_aligner_new(settings) };
62            if aligner.is_null() {
63                panic!("could not create aligner");
64            }
65            unsafe {
66                wavefront_align(
67                    aligner,
68                    x.as_ptr() as *const c_char,
69                    x.len() as c_int,
70                    y.as_ptr() as *const c_char,
71                    y.len() as c_int,
72                )
73            };
74            Self(aligner, PhantomData)
75        }
76        unsafe fn ops(&self) -> &[u8] {
77            let cigar = (*self.0).cigar.as_ref().unwrap();
78            let slice = cigar_op_slice(cigar);
79            slice
80        }
81    }
82
83    impl<'a> Drop for Aligner<'a> {
84        fn drop(&mut self) {
85            unsafe { wavefront_aligner_delete(self.0) };
86        }
87    }
88
89    unsafe fn cigar_op_slice(cigar: &cigar_t) -> &[u8] {
90        let begin_ptr = (cigar.operations as *const u8).offset(cigar.begin_offset as isize);
91        let len = cigar.end_offset - cigar.begin_offset;
92        std::slice::from_raw_parts(begin_ptr, len as usize)
93    }
94
95    const SIZE_LIMIT: u64 = 1 << 30;
96
97    impl Align for Wfa2 {
98        fn align(&self, algo: &AlignAlgorithm, mode: InternalMode, x: &[u8], y: &[u8]) -> Vec<Op> {
99            let mut align_attr = settings(algo, mode, y.len());
100            let aligner = Aligner::new(&mut align_attr, x, y);
101            let mut ret = vec![];
102            let slice = unsafe { aligner.ops() };
103            for &c in slice {
104                match c {
105                    b'M' => ret.push(Op::Match),
106                    b'I' => ret.push(Op::Del),
107                    b'D' => ret.push(Op::Ins),
108                    b'X' => ret.push(Op::Subst),
109                    _ => panic!("unknown cigar operation: {c:x}"),
110                }
111            }
112            ret
113        }
114
115        fn check_params(
116            &self,
117            algo: &AlignAlgorithm,
118            mode: InternalMode,
119            x_size: usize,
120            y_size: usize,
121        ) -> CheckStatus {
122            let mut errors = String::new();
123            if algo.mode == AlignMode::Semiglobal && algo.match_score != 0 {
124                errors.push_str(
125                    "WFA2 does not support semiglobal alignment with non-zero match score\n",
126                );
127            }
128            if algo.mismatch_score >= 0 {
129                errors.push_str("WFA2 mismatch score must be negative\n");
130            }
131            if algo.gap_extend == 0 {
132                errors.push_str("WFA2 gap extend score must not be zero\n");
133            }
134            if !errors.is_empty() {
135                if errors.ends_with('\n') {
136                    errors.pop();
137                }
138                return CheckStatus::Error(errors);
139            }
140            // for global alignment, we use biwfa, but we use regular wfa which uses quadratic memory
141            if matches!(mode, InternalMode::Semiglobal)
142                && x_size as u64 * y_size as u64 > SIZE_LIMIT
143            {
144                return CheckStatus::MemoryWarning;
145            }
146            CheckStatus::Ok
147        }
148    }
149}
150
151#[cfg(not(feature = "wfa2"))]
152mod unimplemented {
153    use crate::{Align, CheckStatus, Op};
154
155    impl Align for super::Wfa2 {
156        fn align(
157            &self,
158            _algo: &crate::AlignAlgorithm,
159            _mode: crate::InternalMode,
160            _x: &[u8],
161            _y: &[u8],
162        ) -> Vec<Op> {
163            unimplemented!()
164        }
165
166        fn check_params(
167            &self,
168            _algo: &crate::AlignAlgorithm,
169            _mode: crate::InternalMode,
170            _x_size: usize,
171            _y_size: usize,
172        ) -> CheckStatus {
173            return CheckStatus::Error(String::from(
174                "WFA2 is not available. Please recompile with the\n\
175                `wfa2` feature enabled or choose another algorithm.",
176            ));
177        }
178    }
179}