1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
use crate::lattice::Lattice;

use std::collections::HashSet;
use std::collections::VecDeque;

pub struct LatticeKMP<'a> {
    pattern: Vec<&'a str>,
    cpattern: Vec<usize>,
}

impl<'a> LatticeKMP<'a> {
    /// Returns LatticeKMP with the given pattern.
    ///
    /// # Arguments
    ///
    /// * `pattern` - A word array
    ///
    /// # Example
    ///
    /// ```
    /// use parattice::LatticeKMP;
    ///
    /// let pattern = vec!["幹", "細胞"];
    /// let kmp = LatticeKMP::new(pattern);
    /// ```
    pub fn new(pattern: Vec<&'a str>) -> LatticeKMP<'a> {
        let mut cpattern = vec![0];
        let mut j;
        for i in 1..pattern.len() {
            j = cpattern[i - 1];
            while j > 0 && pattern[j] != pattern[i] {
                j = cpattern[j - 1];
            }
            cpattern.push(j + if pattern[j] == pattern[i] { 1 } else { 0 });
        }
        LatticeKMP {
            pattern,
            cpattern,
        }
    }

    /// Returns paths of found patterns.
    ///
    /// # Arguments
    ///
    /// * `lattice` - A lattice
    ///
    /// # Example
    ///
    /// ```
    /// use parattice::PaRattice;
    /// use parattice::Lattice;
    /// use parattice::LatticeKMP;
    ///
    /// let pattern = vec!["幹", "細胞"];
    /// let kmp = LatticeKMP::new(pattern);
    ///
    /// let paradict = vec![
    ///     vec![
    ///         vec!["blood", "stem", "cell"],
    ///         vec!["造血", "幹", "細胞"],
    ///         vec!["hematopoietic", "stem", "cell"],
    ///     ],
    ///     vec![
    ///         vec!["造血", "幹", "細胞", "移植"],
    ///         vec!["hematopoietic", "stem", "cell", "transplantation"],
    ///     ],
    ///     vec![vec!["stem", "cell"], vec!["幹", "細胞"]],
    ///     vec![
    ///         vec!["幹", "細胞", "移植"],
    ///         vec!["rescue", "transplant"],
    ///         vec!["stem", "cell", "rescue"],
    ///     ],
    ///     vec![vec!["rescue"], vec!["救命"]],
    ///     vec![vec!["blood"], vec!["血液"]],
    /// ];
    /// let parattice = PaRattice::new(paradict);
    /// let words = vec!["造血", "幹", "細胞", "移植"];
    /// let lattice = parattice.get_lattice(&words, true, 2);
    ///
    /// let results = kmp.search(&lattice);
    /// ```
    pub fn search(&self, lattice: &'a Lattice) -> Vec<Vec<(&'a str, usize)>> {
        let mut added_candidates = HashSet::new();
        let mut queue = VecDeque::new();
        let mut candidates = VecDeque::new();
        let mut results = vec![];
        queue.push_back((0, 0));
        candidates.push_back(VecDeque::new());
        candidates[0].push_back(("", 0));
        while let Some(item) = queue.pop_front() {
            let candidate = candidates.pop_front().unwrap();
            if lattice.lattice[item.0].forward_main.is_none() {
                continue;
            }
            for edge in &lattice.lattice[item.0].forwards {
                let mut j = item.1;
                while j > 0 && edge.0 != self.pattern[j] {
                    j = self.cpattern[j - 1];
                }
                if edge.0 == self.pattern[j] {
                    j += 1;
                }
                let mut new_candidate = VecDeque::new();
                new_candidate.push_back(*edge);
                let mut k = candidate.len();
                while new_candidate.len() < j {
                    k -= 1;
                    new_candidate.push_front(candidate[k]);
                }
                new_candidate.push_front(("", candidate[k - 1].1));
                if j == self.pattern.len() {
                    results.push(new_candidate.clone().into_iter().collect());
                    j = self.cpattern[j - 1];
                    while new_candidate.len() > j + 1 {
                        new_candidate.pop_front();
                    }
                }
                if !added_candidates.contains(&new_candidate) {
                    added_candidates.insert(new_candidate.clone());
                    queue.push_back((edge.1, j));
                    candidates.push_back(new_candidate);
                }
            }
        }
        results
    }
}