1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
use std::borrow::Borrow;
use std::collections::HashMap;

use super::long::Myers as MyersLong;
use super::{BitVec, Myers};

/// Builds a Myers instance, allowing to specify ambiguities.
///
/// # Example:
///
/// This example shows how recognition of IUPAC ambiguities in patterns can be implemented:
///
/// ```
/// # extern crate bio;
/// use bio::pattern_matching::myers::MyersBuilder;
///
/// # fn main() {
/// let ambigs = [
///     (b'M', &b"AC"[..]),
///     (b'R', &b"AG"[..]),
///     (b'W', &b"AT"[..]),
///     (b'S', &b"CG"[..]),
///     (b'Y', &b"CT"[..]),
///     (b'K', &b"GT"[..]),
///     (b'V', &b"ACGMRS"[..]),
///     (b'H', &b"ACTMWY"[..]),
///     (b'D', &b"AGTRWK"[..]),
///     (b'B', &b"CGTSYK"[..]),
///     (b'N', &b"ACGTMRWSYKVHDB"[..])
/// ];
///
/// let mut builder = MyersBuilder::new();
///
/// for &(base, equivalents) in &ambigs {
///     builder.ambig(base, equivalents);
/// }
///
/// let text = b"GGATGNGCGCCATAG";
/// let pattern = b"TRANCGG";
/// //                *   * (mismatch)
///
/// let myers = builder.build_64(pattern);
/// assert_eq!(myers.distance(text), 2);
/// # }
/// ```
///
/// Note that only ambiguities in the pattern are recognized. The reverse is not true; ambiguities
/// in the search text are not matched by multiple symbols in the pattern. This would require
/// specifying additional ambiguities (`builder.ambig(b'A', b"MRWVHDN")`, etc...).
#[derive(Default, Clone, Eq, PartialEq)]
pub struct MyersBuilder {
    ambigs: HashMap<u8, Vec<u8>>,
    wildcards: Vec<u8>,
}

impl MyersBuilder {
    pub fn new() -> MyersBuilder {
        Self::default()
    }

    /// Allows to specify ambiguous symbols and their equivalents. Note that the ambiguous symbol
    /// will always be matched by itself. Explicitly including it in the equivalents is not
    /// necessary.
    ///
    /// # Example:
    ///
    /// ```
    /// # extern crate bio;
    /// use bio::pattern_matching::myers::MyersBuilder;
    ///
    /// # fn main() {
    /// let text = b"GGATGAGCGCCATAG";
    /// let pattern = b"TGAGCGN";
    ///
    /// let myers = MyersBuilder::new()
    ///     .ambig(b'N', b"ACGT")
    ///     .build_64(pattern);
    ///
    /// assert_eq!(myers.distance(text), 0);
    /// # }
    pub fn ambig<I, B>(&mut self, byte: u8, equivalents: I) -> &mut Self
    where
        I: IntoIterator<Item = B>,
        B: Borrow<u8>,
    {
        let eq = equivalents
            .into_iter()
            .map(|b| *b.borrow())
            .chain(Some(byte))
            .collect();
        self.ambigs.insert(byte, eq);
        self
    }

    /// Allows to specify a wildcard symbol, that upon appearance in the search text
    /// shall be matched by any symbol of the pattern. Multiple wildcards are possible.
    /// For the inverse, that is, wildcards in the pattern matching any symbol in search
    /// text, use `ambig(byte, 0..255)`.
    ///
    /// # Example:
    ///
    /// ```
    /// # extern crate bio;
    /// use bio::pattern_matching::myers::MyersBuilder;
    ///
    /// # fn main() {
    /// let text = b"GGATGAGCG*CATAG";
    /// let pattern = b"TGAGCGT";
    ///
    /// let myers = MyersBuilder::new()
    ///     .text_wildcard(b'*')
    ///     .build_64(pattern);
    ///
    /// assert_eq!(myers.distance(text), 0);
    /// # }
    pub fn text_wildcard(&mut self, wildcard: u8) -> &mut Self {
        self.wildcards.push(wildcard);
        self
    }

    /// Creates a Myers instance given a pattern, using `u64` as bit vector type.
    /// Pattern length is restricted to at most 64 symbols.
    pub fn build_64<C, P>(&self, pattern: P) -> Myers<u64>
    where
        C: Borrow<u8>,
        P: IntoIterator<Item = C>,
        P::IntoIter: ExactSizeIterator,
    {
        self.build(pattern)
    }

    /// Creates a Myers instance given a pattern, using `u128` as bit vector type.
    /// Pattern length is restricted to at most 128 symbols.
    #[cfg(has_u128)]
    pub fn build_128<C, P>(&self, pattern: P) -> Myers<u128>
    where
        C: Borrow<u8>,
        P: IntoIterator<Item = C>,
        P::IntoIter: ExactSizeIterator,
    {
        self.build(pattern)
    }

    /// Creates a Myers instance given a pattern, using any desired type for bit vectors.
    /// Pattern length is restricted to the size of the bit vector `T`.
    ///
    /// # Example:
    ///
    /// ```
    /// # extern crate bio;
    /// use bio::pattern_matching::myers::{MyersBuilder, Myers};
    ///
    /// # fn main() {
    /// let myers: Myers<u32> = MyersBuilder::new()
    ///     .text_wildcard(b'*')
    ///     .build(b"TGAGCG*");
    /// // ...
    /// # }
    pub fn build<T, C, P>(&self, pattern: P) -> Myers<T>
    where
        T: BitVec,
        C: Borrow<u8>,
        P: IntoIterator<Item = C>,
        P::IntoIter: ExactSizeIterator,
    {
        Myers::new_ambig(pattern, Some(&self.ambigs), Some(&self.wildcards))
    }

    /// Creates a `long::Myers` instance given a pattern, using `u64` as bit vector type.
    /// Pattern length is not restricted regardless of the type of the bit vector.
    pub fn build_long_64<C, P>(&self, pattern: P) -> MyersLong<u64>
    where
        C: Borrow<u8>,
        P: IntoIterator<Item = C>,
        P::IntoIter: ExactSizeIterator,
    {
        self.build_long(pattern)
    }

    /// Creates a `long::Myers` instance given a pattern, using `u128` as bit vector type.
    /// Pattern length is not restricted regardless of the type of the bit vector.
    #[cfg(has_u128)]
    pub fn build_long_128<C, P>(&self, pattern: P) -> MyersLong<u128>
    where
        C: Borrow<u8>,
        P: IntoIterator<Item = C>,
        P::IntoIter: ExactSizeIterator,
    {
        self.build_long(pattern)
    }

    /// Creates a `long::Myers` instance given a pattern, using any desired type for bit vectors.
    /// Pattern length is not restricted regardless of the type of the bit vector.
    pub fn build_long<T, C, P>(&self, pattern: P) -> MyersLong<T>
    where
        T: BitVec,
        C: Borrow<u8>,
        P: IntoIterator<Item = C>,
        P::IntoIter: ExactSizeIterator,
    {
        MyersLong::new_ambig(pattern, Some(&self.ambigs), Some(&self.wildcards))
    }
}