1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
mod sais;
#[cfg(test)]
mod tests;
mod utils;
use sais::*;
use std::ops::Range;
use utils::*;
#[derive(Clone)]
pub struct SuffixArray<'s> {
s: &'s [u8],
sa: Vec<u32>,
}
impl<'s> SuffixArray<'s> {
pub fn new(s: &'s [u8]) -> Self {
let mut sa = vec![0; s.len() + 1];
construct(s, &mut sa[..]);
SuffixArray { s, sa }
}
pub fn set(&mut self, s: &'s [u8]) {
self.sa.resize(s.len() + 1, 0);
construct(s, &mut self.sa[..]);
}
pub fn fit(&mut self) {
self.sa.shrink_to_fit()
}
pub fn len(&self) -> usize {
self.s.len()
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn contains(&self, sub: &[u8]) -> bool {
self.sa
.binary_search_by_key(&sub, |&i| truncate(&self.s[i as usize..], sub.len()))
.is_ok()
}
pub fn search_all(&self, sub: &[u8]) -> &[u32] {
let mut i = 0;
let mut k = self.sa.len();
while i < k {
let m = i + (k - i) / 2;
if sub > &self.s[self.sa[m] as usize..] {
i = m + 1;
} else {
k = m;
}
}
let mut j = i;
k = self.sa.len();
while j < k {
let m = j + (k - j) / 2;
if self.s[self.sa[m] as usize..].starts_with(sub) {
j = m + 1;
} else {
k = m;
}
}
&self.sa[i..j]
}
pub fn search_prefix(&self, pat: &[u8]) -> Range<usize> {
let point = self.sa.binary_search_by(|&i| self.s[i as usize..].cmp(pat));
match point {
Ok(i) => {
let j = self.sa[i] as usize;
j..self.s.len()
}
Err(i) => {
if i > 0 && i < self.sa.len() {
let j = self.sa[i - 1] as usize;
let k = self.sa[i] as usize;
let a = common_prefix(pat, &self.s[j..]);
let b = common_prefix(pat, &self.s[k..]);
if a > b {
j..j + a
} else {
k..k + b
}
} else if i == self.sa.len() {
let j = self.sa[i - 1] as usize;
let a = common_prefix(pat, &self.s[j..]);
j..j + a
} else {
self.s.len()..self.s.len()
}
}
}
}
}