1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/*!

Build a deterministic finite automaton (DFA) that computes
the levenshtein distance from a given string.


# Example

```rust
# extern crate levenshtein_automata;
use levenshtein_automata::{LevenshteinAutomatonBuilder, Distance};
# fn main() {

// Building this factory is not free.
let lev_automaton_builder = LevenshteinAutomatonBuilder::new(2, true);

// We can now build an entire dfa.
let dfa = lev_automaton_builder.build_dfa("Levenshtein");

let mut state = dfa.initial_state();
    for &b in "Levenshtain".as_bytes() {
    state = dfa.transition(state, b);
}

assert_eq!(dfa.distance(state), Distance::Exact(1));
# }
```

The implementation is based on the following paper
**Fast String Correction with Levenshtein-Automata (2002)** by by Klaus Schulz and Stoyan Mihov.
I also tried to explain it in the following [blog post](https://fulmicoton.com/posts/levenshtein/).

!*/

#![cfg_attr(test, feature(test))]

#[cfg(feature = "fst_automaton")]
extern crate fst;

#[cfg(test)]
extern crate test;
#[cfg(test)]
mod bench;
#[cfg(test)]
mod tests;

mod alphabet;
mod dfa;
mod index;
mod levenshtein_nfa;
mod parametric_dfa;

pub use self::dfa::{DFA, SINK_STATE};
use self::index::Index;
pub use self::levenshtein_nfa::Distance;
use self::levenshtein_nfa::LevenshteinNFA;
use self::parametric_dfa::ParametricDFA;

/// Builder for Levenshtein Automata.
///
/// It wraps a precomputed datastructure that allows to
/// produce small (but not minimal) DFA.
pub struct LevenshteinAutomatonBuilder {
    parametric_dfa: ParametricDFA,
}

impl LevenshteinAutomatonBuilder {
    /// Creates a Levenshtein automaton builder.
    /// The builder
    ///
    /// * `max_distance` - maximum distance considered by the automaton.
    /// * `transposition_cost_one` - assign a distance of 1 for transposition
    ///
    /// Building this automaton builder is computationally intensive.
    /// While it takes only a few milliseconds for `d=2`, it grows exponentially with
    /// `d`. It is only reasonable to `d <= 5`.
    pub fn new(max_distance: u8, transposition_cost_one: bool) -> LevenshteinAutomatonBuilder {
        let levenshtein_nfa = LevenshteinNFA::levenshtein(max_distance, transposition_cost_one);
        let parametric_dfa = ParametricDFA::from_nfa(&levenshtein_nfa);
        LevenshteinAutomatonBuilder {
            parametric_dfa: parametric_dfa,
        }
    }

    /// Builds a Finite Determinstic Automaton to compute
    /// the levenshtein distance to a fixed given `query`.
    ///
    /// There is no guarantee that the resulting DFA is minimal
    /// but its number of states is guaranteed to be smaller
    /// than `C * (query.len() + 1)` in which C is a constant that depends
    /// on the distance as well as whether transposition are supported
    /// or not.
    ///
    /// For instance for `d=2` and with transposition, `C=68`.
    pub fn build_dfa(&self, query: &str) -> DFA {
        self.parametric_dfa.build_dfa(query, false)
    }

    /// Builds a Finite Determinstic Automaton that computes
    /// the prefix levenshtein distance to a given `query`.
    ///
    /// Given a test string, the resulting distance is defined as
    ///
    /// ```formula
    ///     min( levenshtein(&test_string[..i], query } for i in 0..test_string.len() )
    /// ```
    ///
    /// Which translates as *the minimum distance of the prefixes of `test_strings`*.
    ///
    /// See also [.build_dfa(...)](./struct.LevenshteinAutomatonBuilder.html#method.build_dfa).
    pub fn build_prefix_dfa(&self, query: &str) -> DFA {
        self.parametric_dfa.build_dfa(query, true)
    }
}