1use std::cmp::{min, max};
2use crate::segment_tree::{RmqMin, SegmentTreeMin, SegmentTreeMax};
3use std::collections::{BTreeMap, VecDeque};
4
5pub fn kmp(t: &str, p: &str) -> Vec<usize> {
14 let mut res = vec![];
15 let pr = prefix_function(p);
16 let mut idx = 0;
17 let pattern = p.as_bytes();
18 for (i, value) in t.as_bytes().iter().enumerate() {
19 while idx > 0 && pattern[idx] != *value{
20 idx = pr[idx - 1];
21 }
22 if pattern[idx] == *value {
23 idx += 1;
24 }
25 if idx == p.len() {
26 res.push(i + 1 - idx);
27 idx = pr[idx - 1];
28 }
29 }
30 res
31}
32
33#[test]
34fn test_kmp(){
35 assert_eq!(kmp("ababcxabdabcxabcxabcde", "abcxabcde"), vec![13]);
36 assert_eq!(kmp("a", "ab"), vec![]);
37 assert_eq!(kmp("aaaaa", "a"), vec![0, 1, 2, 3, 4]);
38 assert_eq!(kmp("abcdabcd", "abc"), vec![0, 4]);
39}
40
41pub fn kmp_first(t: &str, p: &str) -> Option<usize> {
51 let pr = prefix_function(p);
52 let mut idx = 0;
53 let pattern = p.as_bytes();
54 for (i, value) in t.as_bytes().iter().enumerate() {
55 while idx > 0 && pattern[idx] != *value{
56 idx = pr[idx - 1];
57 }
58 if pattern[idx] == *value {
59 idx += 1;
60 }
61 if idx == p.len() {
62 return Some(i + 1 - idx);
63 }
64 }
65 None
66}
67
68
69
70#[test]
71fn test_kmp_first(){
72 assert_eq!(kmp_first("ababcxabdabcxabcxabcde", "abcxabcde"), Some(13));
73 assert_eq!(kmp_first("a", "ab"), None);
74 assert_eq!(kmp_first("aaaaa", "a"), Some(0));
75 assert_eq!(kmp_first("ebcdabcd", "abc"), Some(4));
76}
77
78pub fn levenshtein_distance(first: &str, second: &str, delete_cost: u32, insert_cost: u32, replace_cost: u32) -> u32 {
86 let first = first.as_bytes();
87 let second = second.as_bytes();
88 let mut dist_old = vec![0; first.len() + 1];
89 for j in 1..(first.len() + 1) {
90 dist_old[j] = dist_old[j - 1] + insert_cost;
91 }
92 for i in 1..second.len() + 1 {
93 let mut dist = vec![0; first.len() + 1];
94 dist[0] = dist_old[0] + delete_cost;
95 for j in 1..(first.len() + 1) {
96 if second[i - 1] != first[j - 1] {
97 dist[j] = min(min(dist_old[j] + delete_cost, dist_old[j - 1] + insert_cost), dist[j - 1] + replace_cost);
98 } else {
99 dist[j] = dist_old[j - 1];
100 }
101 }
102 dist_old = dist;
103 }
104 dist_old[first.len()]
105}
106
107#[test]
108fn test_levenshtein_distance(){
109 assert_eq!(levenshtein_distance("POLYNOMIAL", "EXPONENTIAL", 1, 1, 1), 6);
110 assert_eq!(levenshtein_distance("abcdasdasd", "cddabcd", 1, 1, 1), 6);
111 assert_eq!(levenshtein_distance("", "", 1, 1, 1), 0);
112 assert_eq!(levenshtein_distance("aaa", "aaa", 1, 1, 1), 0);
113 assert_eq!(levenshtein_distance("", "aaa", 1, 1, 1), 3);
114}
115
116pub fn minimum_string_period(src: &str) ->&str {
125 for (idx, value) in z_function(src).iter().enumerate() {
126 if value + idx == src.len() && src.len() % idx == 0 {
127 return &src[..idx];
128 } else if value + idx == src.len() {
129 let k = src.len() % idx;
130 if src[..k] == src[src.len() - k..] {
131 return &src[..idx];
132 }
133 }
134 }
135 src
136}
137
138#[test]
139fn test_minimum_string_period() {
140 assert_eq!(minimum_string_period("abcabcabca"), "abc");
141 assert_eq!(minimum_string_period("abcdefg"), "abcdefg");
142 assert_eq!(minimum_string_period("abcabcabcd"), "abcabcabcd");
143 assert_eq!(minimum_string_period(""), "");
144}
145
146pub fn distinct_substrings(s: &str)->Vec<&str> {
157 let mut seq = vec![];
158 for i in (0..s.len()).rev() {
159 let pr = z_function(&s[i ..s.len()]);
160 let res = s.len() - i - pr.iter().max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
161 for j in 0..res {
162 seq.push(&s[i..s.len() - j]);
163 }
164 }
165 seq
166}
167
168#[test]
169fn test_distinct_substrings(){
170 assert_eq!(distinct_substrings("a"), vec!["a"]);
171 assert_eq!(distinct_substrings("aaaa"), vec!["a", "aa", "aaa", "aaaa"]);
172 assert_eq!(distinct_substrings(""), Vec::<&str>::new());
173 let mut values = distinct_substrings("abaaba");
174 values.sort();
175 assert_eq!(values, vec!["a", "aa", "aab", "aaba", "ab", "aba", "abaa", "abaab", "abaaba", "b", "ba", "baa", "baab", "baaba"]);
176 assert_eq!(distinct_substrings("abacabadabacaba").len(), 85);
177}
178
179fn prefix_function(src: &str) -> Vec<usize> {
180 let mut pi = vec![0; src.len()];
181 let arr = src.as_bytes();
182 for i in 1 .. arr.len() {
183 let mut j = pi[i - 1];
184 while j > 0 && arr[i] != arr[j] {
185 j = pi[j - 1];
186 }
187 if arr[i] == arr[j] {
188 j += 1;
189 }
190 pi[i] = j;
191 }
192 pi
193}
194
195#[test]
196fn test_prefix_function() {
197 assert_eq!(prefix_function("abacaba"), [0, 0, 1, 0, 1, 2, 3]);
198 assert_eq!(prefix_function("b"), [0]);
199 assert_eq!(prefix_function("aaaaa"), [0, 1, 2, 3, 4]);
200 assert_eq!(prefix_function(""), []);
201}
202
203pub fn z_function(src: &str) -> Vec<usize> {
204 let mut z = vec![0; src.len()];
205 let mut l = 0;
206 let mut r = 0;
207
208 let arr = src.as_bytes();
209 for i in 1..src.len() {
210 if i <= r {
211 z[i] = min(r - i + 1, z[i - l]);
212 }
213 while i + z[i] < arr.len() && arr[z[i]] == arr[i + z[i]]{
214 z[i] += 1;
215 }
216 if i + z[i] - 1 > r {
217 l = i;
218 r = i + z[i] - 1;
219 }
220 }
221 z
222}
223
224#[test]
225fn test_z_function_ascii() {
226 assert_eq!(z_function("abacaba"), [0, 0, 1, 0, 3, 0, 1]);
227}
228
229pub fn suffix_array(src: &str) -> (Vec<usize>, Vec<usize>) {
237
238 use std::cmp::Ordering;
239
240 #[derive(Eq, Copy, Clone, Default)]
241 struct Pair<T>
242 where T: std::cmp::Ord {
243 first: T,
244 second: usize,
245 }
246
247 impl<T> Ord for Pair<T>
248 where T: std::cmp::Ord {
249 fn cmp(&self, other: &Self) -> Ordering {
250 if self.first.eq(&other.first){
251 self.second.cmp(&other.second)
252 }else {
253 self.first.cmp(&other.first)
254 }
255 }
256 }
257
258 impl <T> PartialOrd for Pair<T>
259 where T: std::cmp::Ord {
260 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
261 Some(self.cmp(other))
262 }
263 }
264
265 impl <T> PartialEq for Pair<T>
266 where T: std::cmp::Ord {
267 fn eq(&self, other: &Self) -> bool {
268 self.first == other.first && self.second == other.second
269 }
270 }
271
272 fn radix_sort(mut arr: Vec<Pair<Pair<usize>>>) -> Vec<Pair<Pair<usize>>> {
273 let length = arr.len();
274
275 {
276 let mut cnt = vec![0; length];
277 for value in &arr {
278 cnt[value.first.second] += 1;
279 }
280 let mut arr_new: Vec<Pair<Pair<usize>>> = vec![Default::default(); length];
281 let mut pos = vec![0; length];
282 for i in 1..length {
283 pos[i] = pos[i -1] + cnt[i - 1];
284 }
285 for value in &arr {
286 let idx = value.first.second;
287 arr_new[pos[idx]] = *value;
288 pos[idx] += 1;
289 }
290 arr = arr_new;
291 }
292 {
293 let mut cnt = vec![0; length];
294 for value in &arr {
295 cnt[value.first.first] += 1;
296 }
297 let mut arr_new = vec![Default::default(); length];
298 let mut pos = vec![0; length];
299 for i in 1..length {
300 pos[i] = pos[i -1] + cnt[i - 1];
301 }
302 for value in arr {
303 let idx = value.first.first;
304 arr_new[pos[idx]] = value;
305 pos[idx] += 1;
306 }
307 arr = arr_new;
308 }
309 arr
310 }
311
312 let bytes = src.as_bytes();
313 let length = src.len();
314 let mut suffix_array = vec![0; length];
315 let mut classes = vec![0; length];
316 {
317 let mut a: Vec<Pair<u8>> = vec![Default::default(); length];
318 for (i, ch) in bytes.iter().enumerate() {
319 a[i] = Pair{first: *ch, second: i};
320 }
321 a.sort();
322 for i in 0..length {
323 suffix_array[i] = a[i].second;
324 }
325 classes[suffix_array[0]] = 0;
326 for i in 1..length {
327 if a[i].first == a[i - 1].first {
328 classes[suffix_array[i]] = classes[suffix_array[i - 1]];
329 }else {
330 classes[suffix_array[i]] = classes[suffix_array[i - 1]] + 1;
331 }
332 }
333 }
334 let mut k = 0;
335 while (1 << k) < length {
336 let mut a = vec![Default::default(); length];
337 for i in 0..length {
338 a[i] = Pair{first: Pair {first: classes[i], second: classes[(i + (1 << k)) % length]}, second: i};
339 }
340 a = radix_sort(a);
341 for i in 0..length {
342 suffix_array[i] = a[i].second;
343 }
344 classes[suffix_array[0]] = 0;
345 for i in 1..length {
346 if a[i].first == a[i - 1].first {
347 classes[suffix_array[i]] = classes[suffix_array[i - 1]];
348 }else {
349 classes[suffix_array[i]] = classes[suffix_array[i - 1]] + 1;
350 }
351 }
352 k += 1;
353 }
354 (suffix_array, classes)
355}
356
357#[test]
358fn test_suffix_array() {
359 assert_eq!(suffix_array("ababba$").0, vec![6, 5, 0, 2, 4, 1, 3]);
360 assert_eq!(suffix_array("bababa$").0, vec![6, 5, 3, 1, 4, 2, 0]);
361}
362
363pub struct Lcp<'a> {
373 data: RmqMin<usize>,
374 suffix_array: (&'a [usize], &'a [usize]),
375 pos_array: BTreeMap<usize, usize>
376}
377
378#[allow(clippy::many_single_char_names)]
379impl<'a> Lcp<'a> {
380 pub fn build(suffix_array: &'a[usize], classes: &'a[usize], text: &str) -> Self {
381 let mut lcp = vec![0; text.len()];
382 let bytes = text.as_bytes();
383 let mut k = 0;
384 for i in 0.. text.len() - 1 {
385 let pi = classes[i];
386 let j = suffix_array[pi - 1];
387 while bytes[i + k] == bytes[j + k] {
388 k += 1;
389 }
390 lcp[pi] = k;
391 if k > 0 {
392 k = max(k - 1, 0);
393 }
394 }
395 let mut pos = BTreeMap::new();
396 for (i, item) in suffix_array.iter().enumerate() {
397 pos.insert(*item, i);
398 }
399 Lcp {data: RmqMin::new(&lcp), suffix_array: (suffix_array, classes), pos_array: pos }
400 }
401
402 pub fn lcp(&self, i: usize, j: usize) -> Option<usize> {
403 impl SegmentTreeMin for usize {
404 fn minimal() -> usize {
405 usize::MIN
406 }
407 }
408
409 impl SegmentTreeMax for usize {
410 fn maximal() -> usize {
411 usize::MAX
412 }
413 }
414 if let Some(pos_i) = self.pos_array.get(&i) {
415 if let Some(pos_j) = self.pos_array.get(&j) {
416 if *pos_i == *pos_j {
417 return Some(self.suffix_array.0.len() - i - 1);
418 }
419 return self.data.query(min(*pos_i, *pos_j) + 1, max(*pos_i, *pos_j));
420 }
421 }
422 None
423 }
424}
425
426#[test]
427fn test_lcp() {
428 let (p, c) = suffix_array("ababba$");
429 let data = Lcp::build(&p, &c, "ababba$");
430 assert_eq!(data.lcp(0, 5), Some(1));
431 assert_eq!(data.lcp(0, 1), Some(0));
432 assert_eq!(data.lcp(1, 4), Some(2));
433 assert_eq!(data.lcp(3, 4), Some(1));
434 assert_eq!(data.lcp(4, 1), Some(2));
435 assert_eq!(data.lcp(1, 11), None);
436
437 assert_eq!(data.lcp(3, 3), Some(3));
438 assert_eq!(data.lcp(0, 0), Some(6));
439
440}
441
442pub fn hash(s: &str) -> u64 {
449 let mut k = 1;
450 let mut res = 0u64;
451 for ch in s.as_bytes() {
452 res = res.wrapping_add((*ch as u64).wrapping_mul(k));
453 k = k.wrapping_mul(31);
454 }
455 res
456}
457
458#[test]
459fn test_hash() {
460 hash("");
461 hash("a");
462 hash("abc");
463}
464
465#[allow(clippy::many_single_char_names)]
473pub fn common_substring<'a> (a: &'a str, b: &'a str) -> Option<&'a str> {
474 if a.is_empty() || b.is_empty() {
475 return None;
476 }
477 let mut p: Vec<u64> = vec![1; max(a.len(), b.len())];
478 let mut h1: Vec<u64> = vec![0; a.len()];
479 let mut h2: Vec<u64> = vec![0; b.len()];
480 for idx in 1..p.len() {
481 p[idx] = p[idx - 1].wrapping_mul(31);
482 }
483 for (idx, ch) in a.as_bytes().iter().enumerate() {
484 h1[idx] = (*ch as u64).wrapping_mul(p[idx]);
485 if idx != 0 {
486 h1[idx] = h1[idx].wrapping_add(h1[idx - 1]);
487 }
488 }
489 for (idx, ch) in b.as_bytes().iter().enumerate() {
490 h2[idx] = (*ch as u64).wrapping_mul(p[idx]);
491 if idx != 0 {
492 h2[idx] = h2[idx].wrapping_add(h2[idx - 1]);
493 }
494 }
495 let mut res = None;
496 let mut l = 0;
497 let mut r = min(a.len(), b.len()) - 1;
498 while l < r {
499 let mid = r - (r - l) / 2;
500 let mut map = BTreeMap::new();
501 for i in 0..a.len() - mid + 1 {
502 let mut hash = h1[i + mid - 1];
503 if i != 0 {
504 hash = hash.wrapping_sub(h1[i - 1]);
505 }
506 hash = hash.wrapping_mul(p[p.len() - i - 1]);
507 map.insert(hash, i);
508 }
509 let mut f = false;
510 for i in 0..b.len() - mid + 1 {
511 let mut hash = h2[i + mid - 1];
512 if i != 0 {
513 hash = hash.wrapping_sub(h2[i - 1]);
514 }
515 hash = hash.wrapping_mul(p[p.len() - i - 1]);
516 if let Some(idx) = map.get(&hash) {
517 if b[i..i + mid] == a[*idx..*idx + mid] {
518 res = Some(&b[i..i + mid]);
519 f = true;
520 break;
521 }
522 }
523 }
524 if f {
525 l = mid + 1;
526 } else {
527 r = mid - 1;
528 }
529 }
530
531 let mut map = BTreeMap::new();
532 for i in 0..a.len() - l + 1 {
533 let mut hash = h1[i + l - 1];
534 if i != 0 {
535 hash = hash.wrapping_sub(h1[i - 1]);
536 }
537 hash = hash.wrapping_mul(p[p.len() - i - 1]);
538 map.insert(hash, i);
539 }
540 for i in 0..b.len() - l + 1 {
541 let mut hash = h2[i + l - 1];
542 if i != 0 {
543 hash = hash.wrapping_sub(h2[i - 1]);
544 }
545 hash = hash.wrapping_mul(p[p.len() - i - 1]);
546 if let Some(idx) = map.get(&hash) {
547 if b[i..i + l] == a[*idx..*idx + l] {
548 res = Some(&b[i..i + l]);
549 break;
550 }
551 }
552 }
553 res
554}
555
556#[test]
557fn test_common_substring() {
558 assert_eq!(common_substring("VOTEFORTHEGREATALBANIAFORYOU", "CHOOSETHEGREATALBANIANFUTURE"), Some("THEGREATALBANIA"));
559 assert_eq!(common_substring("aba", "cabdd"), Some("ab"));
560 assert_eq!(common_substring("aaaaa", "bbaaa"), Some("aaa"));
561 assert_eq!(common_substring("", "bbaaa"), None);
562 assert_eq!(common_substring("abcde", "abcde"), Some("abcde"));
563 assert_eq!(common_substring("aaaaaaaaaaaaaaaaaaaaaaaaab", "aaaaaaaaaaaaaaaaaaaaaaaaac"), Some("aaaaaaaaaaaaaaaaaaaaaaaaa"));
564}
565
566#[derive(Clone)]
567struct VertexAhoCorasick {
568 children: BTreeMap<i32, i32>,
569 link: i32,
570 good_link: i32,
571 pat_num: i32,
572 pch: i32,
573 parent: i32,
574}
575
576struct TrieAhoCorasick {
577 arr: Vec<VertexAhoCorasick>,
578 sz: i32,
579}
580
581impl TrieAhoCorasick {
582 fn new() -> TrieAhoCorasick {
583 TrieAhoCorasick { arr: vec![VertexAhoCorasick { children: BTreeMap::new(), link: -1, good_link: -1, pat_num: -1, pch: -1, parent: -1 }; 1], sz: 1 }
584 }
585 fn insert(&mut self, s: &str, num: i32) {
586 let mut v = 0;
587 for ch in s.as_bytes() {
588 let idx = *ch as i32;
589 if self.arr[v].children.get(&idx).is_none() {
590 self.arr.push(VertexAhoCorasick { children: BTreeMap::new(), link: 0, good_link: -1, pat_num: -1, pch: idx, parent: v as i32 });
591 self.arr[v].children.insert(idx, self.sz);
592 self.sz += 1;
593 }
594 v = *self.arr[v].children.get(&idx).unwrap() as usize;
595 }
596 self.arr[v].pat_num = num;
597 }
598}
599
600pub fn aho_corasick(dict: &[&str], t: &str) -> BTreeMap<i32, Vec<usize>> {
616 let mut res: BTreeMap<i32, Vec<usize>> = BTreeMap::new();
617 let mut trie = TrieAhoCorasick::new();
618 for (idx, s) in dict.iter().enumerate() {
619 trie.insert(*s, idx as i32);
620 }
621 let mut q = VecDeque::new();
622 q.push_back(0);
623 while !q.is_empty() {
624 let curr = q.pop_front().unwrap();
625
626 for value in trie.arr[curr as usize].children.values() {
627 q.push_back(*value);
628 }
629 if curr == 0 {
630 continue
631 }
632 let parent = trie.arr[curr as usize].parent;
633 let mut next_link = trie.arr[parent as usize].link;
634 let pch = trie.arr[curr as usize].pch;
635 while next_link >= 0 && trie.arr[next_link as usize].children.get(&pch).is_none() {
636 next_link = trie.arr[next_link as usize].link;
637 }
638 if next_link >= 0 {
639 let link = *trie.arr[next_link as usize].children.get(&pch).unwrap();
640 let good_link;
641 if trie.arr[link as usize].pat_num != -1 {
642 good_link = link;
643 } else {
644 good_link = trie.arr[link as usize].good_link;
645 }
646 let r = &mut trie.arr[curr as usize];
647 r.link = link;
648 r.good_link = good_link;
649 }
650 }
651 let mut v = 0i32;
652 for (i, ch) in t.as_bytes().iter().enumerate() {
653 let idx = *ch as i32;
654 while v >= 0 && trie.arr[v as usize].children.get(&idx).is_none() {
655 v = trie.arr[v as usize].link;
656 }
657 if v == -1 {
658 v = 0;
659 } else {
660 v = *trie.arr[v as usize].children.get(&idx).unwrap();
661 }
662 if trie.arr[v as usize].pat_num != -1 {
663 let value = res.entry(trie.arr[v as usize].pat_num).or_insert(vec![]);
664 (*value).push(i + 1 - dict[trie.arr[v as usize].pat_num as usize].len());
665 }
666 let mut good_link = trie.arr[v as usize].good_link;
667 while good_link > 0 {
668 let value = res.entry(trie.arr[good_link as usize].pat_num).or_insert(vec![]);
669 (*value).push(i + 1 - dict[trie.arr[good_link as usize].pat_num as usize].len());
670 good_link = trie.arr[good_link as usize].good_link;
671 }
672 }
673 res
674}
675
676#[test]
677fn test_aho_corasick() {
678 let mut dict = ["aba", "abb", "bbca"];
679 let t = "abaabbbbca";
680 let mut res = aho_corasick(&dict, t);
681
682 let mut m = BTreeMap::new();
683 m.insert(0, vec![0]);
684 m.insert(1, vec![3]);
685 m.insert(2, vec![6]);
686 assert_eq!(m, res);
687
688 let t = "abaabbbbcaaba";
689 res = aho_corasick(&dict, t);
690
691 m = BTreeMap::new();
692 m.insert(0, vec![0, 10]);
693 m.insert(1, vec![3]);
694 m.insert(2, vec![6]);
695 assert_eq!(m, res);
696
697 let t = "abaabbbbcaba";
698 res = aho_corasick(&dict, t);
699
700 m = BTreeMap::new();
701 m.insert(0, vec![0, 9]);
702 m.insert(1, vec![3]);
703 m.insert(2, vec![6]);
704 assert_eq!(m, res);
705
706 dict = ["abba", "bb", "cc"];
707 let t = "abba";
708 res = aho_corasick(&dict, t);
709
710 m = BTreeMap::new();
711 m.insert(0, vec![0]);
712 m.insert(1, vec![1]);
713 assert_eq!(m, res);
714
715 dict = ["aba", "baba", "cc"];
716 let t = "ababababa";
717 res = aho_corasick(&dict, t);
718
719 m = BTreeMap::new();
720 m.insert(0, vec![0, 2, 4, 6]);
721 m.insert(1, vec![1, 3, 5]);
722 assert_eq!(m, res);
723
724}