1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
// Copyright 2014-2016 Johannes Köster.
// Licensed under the MIT license (http://opensource.org/licenses/MIT)
// This file may not be copied, modified, or distributed
// except according to those terms.

//! `ShiftAnd` algorithm for pattern matching.
//! Patterns may contain at most 64 symbols.
//! Complexity: O(n) with text length n.
//!
//! # Example
//!
//! ```rust
//! use bio::pattern_matching::shift_and;
//! let pattern = b"AAAA";
//! let text = b"ACGGCTAGAAAAGGCTAG";
//! let shiftand = shift_and::ShiftAnd::new(pattern);
//! let occ = shiftand.find_all(text).next().unwrap();
//! assert_eq!(occ, 8);
//! ```

use std::iter::Enumerate;

use utils::{IntoTextIterator, TextIterator};

/// `ShiftAnd` algorithm.
pub struct ShiftAnd {
    m: usize,
    masks: [u64; 256],
    accept: u64,
}


impl ShiftAnd {
    /// Create new ShiftAnd instance from a given pattern.
    pub fn new<'a, P: IntoTextIterator<'a>>(pattern: P) -> Self where
        P::IntoIter: ExactSizeIterator {
        let pattern = pattern.into_iter();
        let m = pattern.len();
        assert!(m <= 64,
                "Expecting a pattern of at most 64 symbols.");
        let (masks, accept) = masks(pattern);

        ShiftAnd {
            m: m,
            masks: masks,
            accept: accept,
        }

    }

    /// Find all matches of pattern in the given text. Matches are returned as an iterator
    /// over start positions.
    pub fn find_all<'a, I: IntoTextIterator<'a>>(&'a self, text: I) -> Matches<I::IntoIter> {
        Matches {
            shiftand: self,
            active: 0,
            text: text.into_iter().enumerate(),
        }
    }
}


/// Calculate ShiftAnd masks. This function is called automatically when instantiating
/// a new ShiftAnd for a given pattern.
pub fn masks<'a, I: IntoTextIterator<'a>>(pattern: I) -> ([u64; 256], u64) {
    let mut masks = [0; 256];

    let mut bit = 1;
    for &c in pattern {
        masks[c as usize] |= bit;
        bit *= 2;
    }

    (masks, bit / 2)
}


/// Iterator over start positions of matches.
pub struct Matches<'a, I: TextIterator<'a>> {
    shiftand: &'a ShiftAnd,
    active: u64,
    text: Enumerate<I>,
}


impl<'a, I: Iterator<Item = &'a u8>> Iterator for Matches<'a, I> {
    type Item = usize;

    fn next(&mut self) -> Option<usize> {
        for (i, &c) in self.text.by_ref() {
            self.active = ((self.active << 1) | 1) & self.shiftand.masks[c as usize];
            if self.active & self.shiftand.accept > 0 {
                return Some(i - self.shiftand.m + 1);
            }
        }

        None
    }
}