1pub mod rustbio;
16pub mod wfa2;
17use std::ops::Range;
18use std::sync::Arc;
19
20use serde::{Deserialize, Serialize};
21
22pub use self::rustbio::Banded;
23use self::rustbio::RustBio;
24use self::wfa2::Wfa2;
25
26#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
27enum Op {
28 Match,
29 Subst,
30 Ins,
31 Del,
32 Xclip(usize),
33 Yclip(usize),
34}
35
36fn right_biased_binary_search<F, T, R>(arr: &[R], key: &T, cmp: F) -> Result<usize, usize>
37where
38 F: Fn(&T, &R) -> std::cmp::Ordering,
39{
40 let mut search_range = 0..arr.len();
41 let mut eq_idx = None;
42 while search_range.end - search_range.start > 0 {
43 let middle_index = (search_range.start + search_range.end) >> 1;
44 match cmp(key, &arr[middle_index]) {
45 std::cmp::Ordering::Less => search_range.end = middle_index,
46 std::cmp::Ordering::Equal => {
47 eq_idx = Some(middle_index);
48 search_range.start = middle_index + 1;
49 }
50 std::cmp::Ordering::Greater => search_range.start = middle_index + 1,
51 }
52 }
53 eq_idx.ok_or(search_range.start)
54}
55
56#[cfg(feature = "wfa2")]
57pub const DEFAULT_BLOCKSIZE: usize = 32768;
58#[cfg(not(feature = "wfa2"))]
59pub const DEFAULT_BLOCKSIZE: usize = 8192;
60
61fn as_byte_arrays<FileContent: AsRef<[u8]>>(files: &[Arc<FileContent>; 2]) -> [&[u8]; 2] {
62 [(*files[0]).as_ref(), (*files[1]).as_ref()]
63}
64
65#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq)]
71pub enum AlignMode {
72 Global,
74 Semiglobal,
77 Blockwise(usize),
80}
81
82#[derive(Clone, Copy, Debug)]
83enum InternalMode {
84 Global,
85 Semiglobal,
86}
87
88#[derive(Clone, Copy, Debug, PartialEq, Eq)]
89pub enum AlgorithmKind {
90 Global,
92 Semiglobal,
95}
96
97impl From<AlignMode> for InternalMode {
98 fn from(value: AlignMode) -> Self {
99 match value {
100 AlignMode::Global | AlignMode::Blockwise(_) => InternalMode::Global,
101 AlignMode::Semiglobal => InternalMode::Semiglobal,
102 }
103 }
104}
105
106pub enum CheckStatus {
107 Ok,
109 MemoryWarning,
111 #[allow(dead_code)]
113 Error(String),
114}
115
116trait Align {
117 fn align(&self, algo: &AlignAlgorithm, mode: InternalMode, x: &[u8], y: &[u8]) -> Vec<Op>;
118 fn check_params(
119 &self,
120 algo: &AlignAlgorithm,
121 mode: InternalMode,
122 x_size: usize,
123 y_size: usize,
124 ) -> CheckStatus;
125}
126
127#[derive(Clone, Debug, Serialize, Deserialize)]
129#[serde(tag = "backend")]
130#[non_exhaustive]
131pub enum AlignBackend {
132 #[serde(rename = "rustbio")]
134 RustBio(RustBio),
135 #[serde(rename = "wfa2")]
138 Wfa2(Wfa2),
139}
140
141impl AlignBackend {
142 fn aligner(&self) -> &dyn Align {
143 match self {
144 AlignBackend::RustBio(r) => r,
145 AlignBackend::Wfa2(w) => w,
146 }
147 }
148}
149
150#[derive(Clone, Debug, Serialize, Deserialize)]
152#[serde(default)]
153pub struct AlignAlgorithm {
154 pub name: String,
156 pub gap_open: i32,
158 pub gap_extend: i32,
160 pub mismatch_score: i32,
162 pub match_score: i32,
164 pub mode: AlignMode,
166 pub backend: AlignBackend,
168}
169
170impl Default for AlignAlgorithm {
171 fn default() -> Self {
172 #[cfg(feature = "wfa2")]
173 let backend = AlignBackend::Wfa2(Wfa2);
174 #[cfg(not(feature = "wfa2"))]
175 let backend = AlignBackend::RustBio(RustBio::default());
176 AlignAlgorithm {
177 name: "default".to_string(),
178 gap_open: -8,
179 gap_extend: -1,
180 mismatch_score: -1,
181 match_score: 0,
182 mode: AlignMode::Blockwise(DEFAULT_BLOCKSIZE),
183 backend,
184 }
185 }
186}
187
188#[derive(Clone, Debug)]
190pub struct AlignInfo {
191 pub global: AlignAlgorithm,
192 pub semiglobal: AlignAlgorithm,
193}
194
195#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
196enum Side {
197 First,
198 Second,
199}
200
201impl Side {
202 pub fn as_index(self) -> usize {
203 match self {
204 Side::First => 0,
205 Side::Second => 1,
206 }
207 }
208 pub fn other(self) -> Side {
209 match self {
210 Side::First => Side::Second,
211 Side::Second => Side::First,
212 }
213 }
214}
215
216impl AlignInfo {
217 pub fn check_parameters(
221 &self,
222 filesizes: [usize; 2],
223 selection: [Option<Range<usize>>; 2],
224 ) -> CheckStatus {
225 match selection.clone() {
226 [Some(x), None] | [None, Some(x)] if !x.is_empty() => {
228 let right = selection[1].is_some();
229 let [x, y] = filesizes;
230 let [y_size, x_size] = if right { [x, y] } else { [y, x] };
231 let res = self.semiglobal.check_parameters([x_size, y_size]);
232 if !matches!(res, CheckStatus::Ok) {
233 return res;
234 }
235 }
236 _ => {}
237 };
238 let [x_size, y_size] = if let AlignMode::Blockwise(size) = self.global.mode {
239 [size, size]
240 } else {
241 filesizes
242 };
243 self.global.check_parameters([x_size, y_size])
244 }
245
246 pub fn start_align_with_selection<FileContent: AsRef<[u8]> + Send + Sync + 'static>(
254 &self,
255 files: [Arc<FileContent>; 2],
256 selection: [Option<Range<usize>>; 2],
257 addr: [usize; 2],
258 sender: impl (FnMut(AlignedMessage) -> bool) + Clone + Send + 'static,
259 ) {
260 let (selected, right, end) = match selection.clone() {
261 [Some(x), None] | [None, Some(x)] if !x.is_empty() => {
264 let right = selection[1].is_some();
265 let side = if right { Side::Second } else { Side::First };
266 (x.clone(), side, addr[right as usize] != x.start)
267 }
268 _ => {
269 return self.global.start_align(files, addr, sender);
271 }
272 };
273 let algo = self.clone();
274 std::thread::spawn(move || {
275 algo.align_with_selection(files, (selected, right), end, sender)
276 });
277 }
278
279 fn align_with_selection<FileContent: AsRef<[u8]> + Send + Sync>(
284 &self,
285 files: [Arc<FileContent>; 2],
286 selection: (Range<usize>, Side),
287 end: bool,
288 mut sender: impl (FnMut(AlignedMessage) -> bool) + Clone + Send,
289 ) {
290 let (select, side) = selection;
291 let full_pattern = (*files[side.as_index()]).as_ref();
292 let pattern = &(*files[side.as_index()]).as_ref()[select.clone()];
293 let text = &(*files[side.other().as_index()]).as_ref()[..];
294 let alignment = self
295 .semiglobal
296 .align([pattern, text], InternalMode::Semiglobal);
297
298 let (alignment, textaddr) = ops_pattern_subrange(&alignment);
299 let (mut array, pattern_end, text_end) =
300 AlignElement::from_array(alignment, full_pattern, text, select.start, textaddr);
301 let (start_addr, end_addr) = match side {
302 Side::First => ([select.start, textaddr], [pattern_end, text_end]),
303 Side::Second => {
304 array.iter_mut().for_each(|x| *x = x.mirror());
305 ([textaddr, select.start], [text_end, pattern_end])
306 }
307 };
308 let (prepend, append) = if end {
309 let ap = array.pop().into_iter().collect();
310 (array, ap)
311 } else {
312 (Vec::new(), array)
313 };
314 if !sender(AlignedMessage::Append(append)) {
315 return;
316 }
317 if !sender(AlignedMessage::Prepend(prepend)) {
318 return;
319 }
320 let files2 = files.clone();
321 let sender2 = sender.clone();
322 let algo = self.global.clone();
323 std::thread::scope(|s| {
324 s.spawn(move || {
325 algo.align_end(as_byte_arrays(&files2), end_addr, sender2);
326 });
327 self.global
328 .align_front(as_byte_arrays(&files), start_addr, sender);
329 });
330 }
331}
332
333impl AlignAlgorithm {
334 pub fn check_parameters(&self, filesizes: [usize; 2]) -> CheckStatus {
338 let mut errors = String::new();
339 if self.name.is_empty() {
340 errors.push_str("name is invalid: must not be empty\n");
341 }
342 if self.gap_open > 0 {
343 errors.push_str("gap open is invalid: must not be positive\n");
344 }
345 if self.gap_extend > 0 {
346 errors.push_str("gap extend is invalid: must not be positive\n");
347 }
348 if !errors.is_empty() {
349 if errors.ends_with('\n') {
350 errors.pop();
351 }
352 return CheckStatus::Error(errors);
353 }
354 let [x_size, y_size] = if let AlignMode::Blockwise(size) = self.mode {
355 [size, size]
356 } else {
357 filesizes
358 };
359 self.backend
360 .aligner()
361 .check_params(self, self.mode.into(), x_size, y_size)
362 }
363
364 pub fn default_semiglobal() -> Self {
367 AlignAlgorithm {
368 mode: AlignMode::Semiglobal,
369 ..Default::default()
370 }
371 }
372
373 pub fn align_whole<FileContent: AsRef<[u8]>>(
375 &self,
376 files: [Arc<FileContent>; 2],
377 ) -> Vec<AlignElement> {
378 let [x, y] = as_byte_arrays(&files);
379 let alignment = self.align([x, y], InternalMode::Global);
380 AlignElement::from_array(&alignment, x.as_ref(), y.as_ref(), 0, 0).0
381 }
382 pub fn start_align<FileContent: AsRef<[u8]> + Send + Sync + 'static>(
387 &self,
388 files: [Arc<FileContent>; 2],
389 addr: [usize; 2],
390 mut sender: impl (FnMut(AlignedMessage) -> bool) + Clone + Send + 'static,
391 ) {
392 let algo = self.clone();
393 match self.mode {
394 AlignMode::Global => {
395 std::thread::spawn(move || {
396 let res = algo.align_whole(files);
397 sender(AlignedMessage::Initial(res, addr));
398 });
399 }
400 AlignMode::Blockwise(_) => {
401 std::thread::spawn(move || algo.align_initial_block(files, addr, sender));
402 }
403 AlignMode::Semiglobal => panic!("Semiglobal alignment is not supported here"),
404 }
405 }
406
407 fn align(&self, [x, y]: [&[u8]; 2], mode: InternalMode) -> Vec<Op> {
408 if x[..] == y[..] {
409 return vec![Op::Match; x.len()];
410 }
411 self.backend.aligner().align(self, mode, x, y)
412 }
413
414 fn block_size(&self, files: [&[u8]; 2]) -> usize {
415 if let AlignMode::Blockwise(size) = self.mode {
416 size
417 } else {
418 let [x, y] = files;
419 x.len().max(y.len())
420 }
421 }
422
423 pub fn align_initial_block<FileContent: AsRef<[u8]> + Send + Sync>(
428 &self,
429 files: [Arc<FileContent>; 2],
430 addresses: [usize; 2],
431 mut sender: impl (FnMut(AlignedMessage) -> bool) + Clone + Send,
432 ) {
433 let [xaddr, yaddr] = addresses;
434 let [x, y] = as_byte_arrays(&files);
435 let block_size = self.block_size(as_byte_arrays(&files));
436 let before = (block_size / 2).min(xaddr).min(yaddr);
439 let before_deficit = block_size / 2 - before;
442 let after = ((block_size + 1) / 2 + before_deficit)
443 .min(x.len() - xaddr)
444 .min(y.len() - yaddr);
445 let x_block = &x[xaddr - before..xaddr + after];
446 let y_block = &y[yaddr - before..yaddr + after];
447 let aligned = self.align([x_block, y_block], self.mode.into());
448 let (aligned_ops, xaddr_end, yaddr_end) =
449 AlignElement::from_array(&aligned, x, y, xaddr - before, yaddr - before);
450 let (Ok(xcursor) | Err(xcursor)) =
451 right_biased_binary_search(&aligned_ops, &xaddr, |lhs, rhs| lhs.cmp(&rhs.xaddr));
452 let xcursor = xcursor as usize;
453 let (Ok(ycursor) | Err(ycursor)) =
454 right_biased_binary_search(&aligned_ops, &xaddr, |lhs, rhs| lhs.cmp(&rhs.yaddr));
455 let ycursor = ycursor as usize;
456 let middle = (xcursor + ycursor) / 2;
457 let start = (middle - before / 2).min(xcursor).min(ycursor);
461 let end = (middle + after / 2).max(xcursor).max(ycursor);
462 let ops = aligned_ops[start..end].to_vec();
463 if !sender(AlignedMessage::Initial(ops, [xaddr, yaddr])) {
464 return;
465 }
466 let algo = self.clone();
467 let files_cp = files.clone();
468 let sender_cp = sender.clone();
469 let start_addr = [aligned_ops[start].xaddr, aligned_ops[start].yaddr];
470 std::thread::scope(|s| {
471 s.spawn(move || {
472 let files_cp = as_byte_arrays(&files_cp);
473 algo.align_front(files_cp, start_addr, sender_cp)
474 });
475 let end_addr = aligned_ops
476 .get(end)
477 .map(|x| [x.xaddr, x.yaddr])
478 .unwrap_or([xaddr_end, yaddr_end]);
479 self.align_end(as_byte_arrays(&files), end_addr, sender);
480 })
481 }
482
483 pub fn align_end(
485 &self,
486 files: [&[u8]; 2],
487 start_addresses: [usize; 2],
488 mut sender: impl FnMut(AlignedMessage) -> bool,
489 ) {
490 let block_size = self.block_size(files);
491 let [mut xaddr, mut yaddr] = start_addresses;
492 let [x, y] = files;
493 while xaddr < x.len() && yaddr < y.len() {
496 let end_aligned = self.align(
498 [
499 &x[xaddr..(xaddr + block_size).min(x.len())],
500 &y[yaddr..(yaddr + block_size).min(y.len())],
501 ],
502 self.mode.into(),
503 );
504 let ops = &end_aligned[0..end_aligned.len().min(block_size / 2)];
507 if ops.is_empty() {
509 break;
510 }
511 let (end, new_xaddr, new_yaddr) = AlignElement::from_array(ops, &x, &y, xaddr, yaddr);
512 if !sender(AlignedMessage::Append(end)) {
513 return;
514 }
515 xaddr = new_xaddr;
516 yaddr = new_yaddr;
517 }
518 let clip = if x.len() == xaddr {
519 Op::Yclip(y.len() - yaddr)
520 } else if y.len() == yaddr {
521 Op::Xclip(x.len() - xaddr)
522 } else {
523 return;
524 };
525 let leftover = AlignElement::from_array(&[clip], &x, &y, xaddr, yaddr).0;
526 let _ = sender(AlignedMessage::Append(leftover));
527 }
528 pub fn align_front(
530 &self,
531 files: [&[u8]; 2],
532 end_addresses: [usize; 2],
533 mut sender: impl FnMut(AlignedMessage) -> bool,
534 ) {
535 let block_size = self.block_size(files);
536 let [mut xaddr, mut yaddr] = end_addresses;
537 let [x, y] = files;
538 while xaddr > 0 && yaddr > 0 {
539 let lower_xaddr = xaddr.saturating_sub(block_size);
540 let lower_yaddr = yaddr.saturating_sub(block_size);
541 let aligned = self.align(
542 [&x[lower_xaddr..xaddr], &y[lower_yaddr..yaddr]],
543 self.mode.into(),
544 );
545 let (end, _, _) = AlignElement::from_array(&aligned, &x, &y, lower_xaddr, lower_yaddr);
549 let real_end = Vec::from(&end[end.len().saturating_sub(block_size / 2)..end.len()]);
550 if real_end.is_empty() {
552 break;
553 }
554 let first = real_end.first().unwrap();
555 xaddr = first.xaddr;
556 yaddr = first.yaddr;
557 if !sender(AlignedMessage::Prepend(real_end)) {
558 return;
559 }
560 }
561 let clip = if xaddr == 0 {
562 Op::Yclip(yaddr)
563 } else if yaddr == 0 {
564 Op::Xclip(xaddr)
565 } else {
566 return;
567 };
568 let leftover = AlignElement::from_array(&[clip], &x, &y, 0, 0).0;
569 let _ = sender(AlignedMessage::Prepend(leftover));
570 }
571}
572
573#[derive(Clone, Copy, Debug)]
576pub struct AlignElement {
577 pub xaddr: usize,
579 pub xbyte: Option<u8>,
581 pub yaddr: usize,
583 pub ybyte: Option<u8>,
585}
586
587#[derive(Clone, Debug)]
588pub enum AlignedMessage {
589 Append(Vec<AlignElement>),
591 Prepend(Vec<AlignElement>),
593 Initial(Vec<AlignElement>, [usize; 2]),
596}
597
598impl AlignElement {
599 pub fn mirror(&self) -> AlignElement {
601 AlignElement {
602 xaddr: self.yaddr,
603 xbyte: self.ybyte,
604 yaddr: self.xaddr,
605 ybyte: self.xbyte,
606 }
607 }
608
609 fn from_array(
612 r: &[Op],
613 x: &[u8],
614 y: &[u8],
615 mut xaddr: usize,
616 mut yaddr: usize,
617 ) -> (Vec<AlignElement>, usize, usize) {
618 let mut v = Vec::new();
619 for op in r {
620 match op {
621 Op::Match | Op::Subst => {
622 v.push(AlignElement {
623 xaddr,
624 xbyte: Some(x[xaddr]),
625 yaddr,
626 ybyte: Some(y[yaddr]),
627 });
628 xaddr += 1;
629 yaddr += 1;
630 }
631 Op::Ins => {
632 v.push(AlignElement {
633 xaddr,
634 xbyte: Some(x[xaddr]),
635 yaddr,
636 ybyte: None,
637 });
638 xaddr += 1;
639 }
640 Op::Del => {
641 v.push(AlignElement {
642 xaddr,
643 xbyte: None,
644 yaddr,
645 ybyte: Some(y[yaddr]),
646 });
647 yaddr += 1;
648 }
649 Op::Xclip(size) => {
650 v.extend((xaddr..xaddr + size).map(|s| AlignElement {
651 xaddr: s,
652 xbyte: Some(x[s]),
653 yaddr,
654 ybyte: None,
655 }));
656 xaddr += size
657 }
658 Op::Yclip(size) => {
659 v.extend((yaddr..yaddr + size).map(|s| AlignElement {
660 xaddr,
661 xbyte: None,
662 yaddr: s,
663 ybyte: Some(y[s]),
664 }));
665 yaddr += size
666 }
667 }
668 }
669 (v, xaddr, yaddr)
670 }
671}
672
673fn ops_pattern_subrange(mut ops: &[Op]) -> (&[Op], usize) {
675 let mut ret_addr = 0;
676 if let [Op::Yclip(addr), rest @ ..] = ops {
677 ops = rest;
678 ret_addr += addr;
679 }
680 while let [Op::Del, rest @ ..] = ops {
681 ops = rest;
682 ret_addr += 1;
683 }
684 while let [rest @ .., Op::Del | Op::Yclip(_)] = ops {
685 ops = rest;
686 }
687 (ops, ret_addr)
688}