1use crate::error::{CoreError, CoreResult, ErrorContext};
29
30pub fn levenshtein_distance(a: &str, b: &str) -> usize {
50 let a_chars: Vec<char> = a.chars().collect();
51 let b_chars: Vec<char> = b.chars().collect();
52 let m = a_chars.len();
53 let n = b_chars.len();
54
55 if m == 0 {
56 return n;
57 }
58 if n == 0 {
59 return m;
60 }
61
62 let mut prev = vec![0usize; n + 1];
64 let mut curr = vec![0usize; n + 1];
65
66 for j in 0..=n {
67 prev[j] = j;
68 }
69
70 for i in 1..=m {
71 curr[0] = i;
72 for j in 1..=n {
73 let cost = if a_chars[i - 1] == b_chars[j - 1] {
74 0
75 } else {
76 1
77 };
78 curr[j] = (prev[j] + 1).min(curr[j - 1] + 1).min(prev[j - 1] + cost);
79 }
80 std::mem::swap(&mut prev, &mut curr);
81 }
82
83 prev[n]
84}
85
86pub fn normalized_levenshtein(a: &str, b: &str) -> f64 {
90 let max_len = a.chars().count().max(b.chars().count());
91 if max_len == 0 {
92 return 0.0;
93 }
94 levenshtein_distance(a, b) as f64 / max_len as f64
95}
96
97pub fn levenshtein_similarity(a: &str, b: &str) -> f64 {
99 1.0 - normalized_levenshtein(a, b)
100}
101
102pub fn hamming_distance(a: &str, b: &str) -> CoreResult<usize> {
122 let a_chars: Vec<char> = a.chars().collect();
123 let b_chars: Vec<char> = b.chars().collect();
124 if a_chars.len() != b_chars.len() {
125 return Err(CoreError::ValueError(ErrorContext::new(format!(
126 "Hamming distance requires equal-length strings (got {} and {})",
127 a_chars.len(),
128 b_chars.len()
129 ))));
130 }
131 let dist = a_chars
132 .iter()
133 .zip(b_chars.iter())
134 .filter(|(x, y)| x != y)
135 .count();
136 Ok(dist)
137}
138
139pub fn jaro_similarity(a: &str, b: &str) -> f64 {
156 let a_chars: Vec<char> = a.chars().collect();
157 let b_chars: Vec<char> = b.chars().collect();
158 let la = a_chars.len();
159 let lb = b_chars.len();
160
161 if la == 0 && lb == 0 {
162 return 1.0;
163 }
164 if la == 0 || lb == 0 {
165 return 0.0;
166 }
167
168 let match_window = (la.max(lb) / 2).saturating_sub(1);
169
170 let mut a_matched = vec![false; la];
171 let mut b_matched = vec![false; lb];
172
173 let mut matches = 0usize;
174 let mut transpositions = 0usize;
175
176 for i in 0..la {
178 let start = i.saturating_sub(match_window);
179 let end = (i + match_window + 1).min(lb);
180 for j in start..end {
181 if !b_matched[j] && a_chars[i] == b_chars[j] {
182 a_matched[i] = true;
183 b_matched[j] = true;
184 matches += 1;
185 break;
186 }
187 }
188 }
189
190 if matches == 0 {
191 return 0.0;
192 }
193
194 let mut k = 0;
196 for i in 0..la {
197 if !a_matched[i] {
198 continue;
199 }
200 while !b_matched[k] {
201 k += 1;
202 }
203 if a_chars[i] != b_chars[k] {
204 transpositions += 1;
205 }
206 k += 1;
207 }
208
209 let m = matches as f64;
210 (m / la as f64 + m / lb as f64 + (m - transpositions as f64 / 2.0) / m) / 3.0
211}
212
213pub fn jaro_winkler_similarity(a: &str, b: &str, prefix_weight: f64) -> f64 {
227 let jaro = jaro_similarity(a, b);
228 let a_chars: Vec<char> = a.chars().collect();
229 let b_chars: Vec<char> = b.chars().collect();
230
231 let max_prefix = 4.min(a_chars.len()).min(b_chars.len());
233 let mut prefix_len = 0;
234 for i in 0..max_prefix {
235 if a_chars[i] == b_chars[i] {
236 prefix_len += 1;
237 } else {
238 break;
239 }
240 }
241
242 let weight = prefix_weight.min(0.25); jaro + prefix_len as f64 * weight * (1.0 - jaro)
244}
245
246pub fn longest_common_subsequence(a: &str, b: &str) -> usize {
261 let a_chars: Vec<char> = a.chars().collect();
262 let b_chars: Vec<char> = b.chars().collect();
263 let m = a_chars.len();
264 let n = b_chars.len();
265
266 if m == 0 || n == 0 {
267 return 0;
268 }
269
270 let mut prev = vec![0usize; n + 1];
272 let mut curr = vec![0usize; n + 1];
273
274 for i in 1..=m {
275 for j in 1..=n {
276 if a_chars[i - 1] == b_chars[j - 1] {
277 curr[j] = prev[j - 1] + 1;
278 } else {
279 curr[j] = prev[j].max(curr[j - 1]);
280 }
281 }
282 std::mem::swap(&mut prev, &mut curr);
283 for v in curr.iter_mut() {
284 *v = 0;
285 }
286 }
287
288 *prev.iter().max().unwrap_or(&0)
289}
290
291pub fn lcs_string(a: &str, b: &str) -> String {
301 let a_chars: Vec<char> = a.chars().collect();
302 let b_chars: Vec<char> = b.chars().collect();
303 let m = a_chars.len();
304 let n = b_chars.len();
305
306 if m == 0 || n == 0 {
307 return String::new();
308 }
309
310 let mut dp = vec![vec![0usize; n + 1]; m + 1];
312 for i in 1..=m {
313 for j in 1..=n {
314 if a_chars[i - 1] == b_chars[j - 1] {
315 dp[i][j] = dp[i - 1][j - 1] + 1;
316 } else {
317 dp[i][j] = dp[i - 1][j].max(dp[i][j - 1]);
318 }
319 }
320 }
321
322 let mut result = Vec::new();
324 let mut i = m;
325 let mut j = n;
326 while i > 0 && j > 0 {
327 if a_chars[i - 1] == b_chars[j - 1] {
328 result.push(a_chars[i - 1]);
329 i -= 1;
330 j -= 1;
331 } else if dp[i - 1][j] > dp[i][j - 1] {
332 i -= 1;
333 } else {
334 j -= 1;
335 }
336 }
337
338 result.reverse();
339 result.into_iter().collect()
340}
341
342pub fn lcs_similarity(a: &str, b: &str) -> f64 {
344 let max_len = a.chars().count().max(b.chars().count());
345 if max_len == 0 {
346 return 1.0;
347 }
348 longest_common_subsequence(a, b) as f64 / max_len as f64
349}
350
351pub fn ngrams<'a>(tokens: &[&'a str], n: usize) -> CoreResult<Vec<Vec<&'a str>>> {
368 if n == 0 {
369 return Err(CoreError::ValueError(ErrorContext::new(
370 "n must be >= 1 for n-gram generation",
371 )));
372 }
373 if tokens.len() < n {
374 return Ok(Vec::new());
375 }
376 let result: Vec<Vec<&str>> = tokens.windows(n).map(|w| w.to_vec()).collect();
377 Ok(result)
378}
379
380pub fn char_ngrams(text: &str, n: usize) -> CoreResult<Vec<String>> {
391 if n == 0 {
392 return Err(CoreError::ValueError(ErrorContext::new(
393 "n must be >= 1 for char n-gram generation",
394 )));
395 }
396 let chars: Vec<char> = text.chars().collect();
397 if chars.len() < n {
398 return Ok(Vec::new());
399 }
400 let result: Vec<String> = chars.windows(n).map(|w| w.iter().collect()).collect();
401 Ok(result)
402}
403
404pub fn skip_bigrams<'a>(tokens: &[&'a str], skip: usize) -> Vec<(&'a str, &'a str)> {
409 let mut result = Vec::new();
410 for i in 0..tokens.len() {
411 for gap in 1..=(skip + 1) {
412 if i + gap < tokens.len() {
413 result.push((tokens[i], tokens[i + gap]));
414 }
415 }
416 }
417 result
418}
419
420pub fn tokenize_whitespace(text: &str) -> Vec<&str> {
435 text.split_whitespace().collect()
436}
437
438pub fn tokenize_char(text: &str, delimiter: char) -> Vec<&str> {
449 text.split(delimiter).collect()
450}
451
452pub fn tokenize_predicate<F: Fn(char) -> bool>(text: &str, predicate: F) -> Vec<&str> {
456 text.split(|c| predicate(c)).collect()
457}
458
459pub fn tokenize_pattern<'a>(text: &'a str, pattern: &str) -> Vec<&'a str> {
461 text.split(pattern).collect()
462}
463
464pub fn tokenize_sentences(text: &str) -> Vec<String> {
468 text.split(['.', '!', '?'])
469 .map(|s| s.trim().to_string())
470 .filter(|s| !s.is_empty())
471 .collect()
472}
473
474pub fn tokenize_words(text: &str) -> Vec<&str> {
478 let mut tokens = Vec::new();
479 let mut start = None;
480 for (i, c) in text.char_indices() {
481 if c.is_alphanumeric() || c == '_' {
482 if start.is_none() {
483 start = Some(i);
484 }
485 } else {
486 if let Some(s) = start {
487 tokens.push(&text[s..i]);
488 start = None;
489 }
490 }
491 }
492 if let Some(s) = start {
493 tokens.push(&text[s..]);
494 }
495 tokens
496}
497
498pub fn to_snake_case(s: &str) -> String {
514 let words = split_into_words(s);
515 words
516 .iter()
517 .map(|w| w.to_lowercase())
518 .collect::<Vec<_>>()
519 .join("_")
520}
521
522pub fn to_camel_case(s: &str) -> String {
533 let words = split_into_words(s);
534 let mut result = String::new();
535 for (i, word) in words.iter().enumerate() {
536 if i == 0 {
537 result.push_str(&word.to_lowercase());
538 } else {
539 let mut chars = word.chars();
540 if let Some(first) = chars.next() {
541 result.extend(first.to_uppercase());
542 result.push_str(&chars.as_str().to_lowercase());
543 }
544 }
545 }
546 result
547}
548
549pub fn to_pascal_case(s: &str) -> String {
560 let words = split_into_words(s);
561 let mut result = String::new();
562 for word in &words {
563 let mut chars = word.chars();
564 if let Some(first) = chars.next() {
565 result.extend(first.to_uppercase());
566 result.push_str(&chars.as_str().to_lowercase());
567 }
568 }
569 result
570}
571
572pub fn to_kebab_case(s: &str) -> String {
583 let words = split_into_words(s);
584 words
585 .iter()
586 .map(|w| w.to_lowercase())
587 .collect::<Vec<_>>()
588 .join("-")
589}
590
591pub fn to_screaming_snake_case(s: &str) -> String {
602 let words = split_into_words(s);
603 words
604 .iter()
605 .map(|w| w.to_uppercase())
606 .collect::<Vec<_>>()
607 .join("_")
608}
609
610pub fn to_title_case(s: &str) -> String {
612 let words = split_into_words(s);
613 let mut result = Vec::with_capacity(words.len());
614 for word in &words {
615 let mut chars = word.chars();
616 if let Some(first) = chars.next() {
617 let mut titled = String::new();
618 titled.extend(first.to_uppercase());
619 titled.push_str(&chars.as_str().to_lowercase());
620 result.push(titled);
621 }
622 }
623 result.join(" ")
624}
625
626fn split_into_words(s: &str) -> Vec<String> {
629 let mut words = Vec::new();
630 let mut current = String::new();
631 let chars: Vec<char> = s.chars().collect();
632 let len = chars.len();
633
634 let mut i = 0;
635 while i < len {
636 let c = chars[i];
637
638 if c == '_' || c == '-' || c == ' ' || c == '.' {
640 if !current.is_empty() {
641 words.push(current.clone());
642 current.clear();
643 }
644 i += 1;
645 continue;
646 }
647
648 if c.is_uppercase() {
649 if !current.is_empty() {
651 let prev = chars[i - 1];
653 if prev.is_lowercase() || prev.is_ascii_digit() {
654 words.push(current.clone());
655 current.clear();
656 }
657 else if i + 1 < len && chars[i + 1].is_lowercase() && !current.is_empty() {
660 words.push(current.clone());
662 current.clear();
663 }
664 }
665 current.push(c);
666 } else {
667 current.push(c);
668 }
669
670 i += 1;
671 }
672
673 if !current.is_empty() {
674 words.push(current);
675 }
676
677 words
678}
679
680pub fn dice_coefficient(a: &str, b: &str) -> f64 {
688 let a_bigrams: Vec<String> = char_ngrams(a, 2).unwrap_or_default();
689 let b_bigrams: Vec<String> = char_ngrams(b, 2).unwrap_or_default();
690
691 if a_bigrams.is_empty() && b_bigrams.is_empty() {
692 return 1.0;
693 }
694 if a_bigrams.is_empty() || b_bigrams.is_empty() {
695 return 0.0;
696 }
697
698 let mut intersection = 0usize;
699 let mut b_used = vec![false; b_bigrams.len()];
700
701 for bg_a in &a_bigrams {
702 for (j, bg_b) in b_bigrams.iter().enumerate() {
703 if !b_used[j] && bg_a == bg_b {
704 intersection += 1;
705 b_used[j] = true;
706 break;
707 }
708 }
709 }
710
711 2.0 * intersection as f64 / (a_bigrams.len() + b_bigrams.len()) as f64
712}
713
714pub fn pad_left(s: &str, width: usize, fill: char) -> String {
716 let char_count = s.chars().count();
717 if char_count >= width {
718 return s.to_string();
719 }
720 let padding: String = std::iter::repeat_n(fill, width - char_count).collect();
721 format!("{}{}", padding, s)
722}
723
724pub fn pad_right(s: &str, width: usize, fill: char) -> String {
726 let char_count = s.chars().count();
727 if char_count >= width {
728 return s.to_string();
729 }
730 let padding: String = std::iter::repeat_n(fill, width - char_count).collect();
731 format!("{}{}", s, padding)
732}
733
734pub fn center(s: &str, width: usize, fill: char) -> String {
736 let char_count = s.chars().count();
737 if char_count >= width {
738 return s.to_string();
739 }
740 let total_pad = width - char_count;
741 let left_pad = total_pad / 2;
742 let right_pad = total_pad - left_pad;
743 let left: String = std::iter::repeat_n(fill, left_pad).collect();
744 let right: String = std::iter::repeat_n(fill, right_pad).collect();
745 format!("{}{}{}", left, s, right)
746}
747
748pub fn reverse(s: &str) -> String {
750 s.chars().rev().collect()
751}
752
753pub fn is_palindrome(s: &str) -> bool {
755 let chars: Vec<char> = s
756 .chars()
757 .filter(|c| c.is_alphanumeric())
758 .map(|c| c.to_lowercase().next().unwrap_or(c))
759 .collect();
760 let len = chars.len();
761 if len <= 1 {
762 return true;
763 }
764 for i in 0..len / 2 {
765 if chars[i] != chars[len - 1 - i] {
766 return false;
767 }
768 }
769 true
770}
771
772pub fn count_occurrences(text: &str, pattern: &str) -> usize {
774 if pattern.is_empty() {
775 return 0;
776 }
777 text.matches(pattern).count()
778}
779
780#[cfg(test)]
785mod tests {
786 use super::*;
787
788 #[test]
791 fn test_levenshtein_identical() {
792 assert_eq!(levenshtein_distance("hello", "hello"), 0);
793 }
794
795 #[test]
796 fn test_levenshtein_empty() {
797 assert_eq!(levenshtein_distance("", "abc"), 3);
798 assert_eq!(levenshtein_distance("xyz", ""), 3);
799 assert_eq!(levenshtein_distance("", ""), 0);
800 }
801
802 #[test]
803 fn test_levenshtein_known() {
804 assert_eq!(levenshtein_distance("kitten", "sitting"), 3);
805 assert_eq!(levenshtein_distance("saturday", "sunday"), 3);
806 }
807
808 #[test]
809 fn test_normalized_levenshtein() {
810 assert!((normalized_levenshtein("abc", "abc") - 0.0).abs() < 1e-10);
811 assert!((normalized_levenshtein("", "") - 0.0).abs() < 1e-10);
812 }
813
814 #[test]
815 fn test_levenshtein_similarity() {
816 assert!((levenshtein_similarity("abc", "abc") - 1.0).abs() < 1e-10);
817 }
818
819 #[test]
822 fn test_hamming_basic() {
823 assert_eq!(hamming_distance("karolin", "kathrin").expect("ok"), 3);
824 assert_eq!(hamming_distance("abc", "abc").expect("ok"), 0);
825 }
826
827 #[test]
828 fn test_hamming_unequal_length() {
829 assert!(hamming_distance("ab", "abc").is_err());
830 }
831
832 #[test]
835 fn test_jaro_identical() {
836 assert!((jaro_similarity("abc", "abc") - 1.0).abs() < 1e-10);
837 }
838
839 #[test]
840 fn test_jaro_empty() {
841 assert!((jaro_similarity("", "") - 1.0).abs() < 1e-10);
842 assert!((jaro_similarity("abc", "") - 0.0).abs() < 1e-10);
843 }
844
845 #[test]
846 fn test_jaro_known() {
847 let sim = jaro_similarity("martha", "marhta");
848 assert!((sim - 0.9444).abs() < 0.001, "jaro: {}", sim);
849 }
850
851 #[test]
852 fn test_jaro_winkler() {
853 let sim = jaro_winkler_similarity("martha", "marhta", 0.1);
854 assert!(sim > 0.94, "jw: {}", sim);
855 assert!(sim >= jaro_similarity("martha", "marhta"));
857 }
858
859 #[test]
860 fn test_jaro_winkler_different() {
861 let sim = jaro_winkler_similarity("abc", "xyz", 0.1);
862 assert!(sim < 0.1);
863 }
864
865 #[test]
868 fn test_lcs_length() {
869 assert_eq!(longest_common_subsequence("abcde", "ace"), 3);
870 assert_eq!(longest_common_subsequence("abc", "def"), 0);
871 assert_eq!(longest_common_subsequence("", "abc"), 0);
872 }
873
874 #[test]
875 fn test_lcs_string() {
876 assert_eq!(lcs_string("abcde", "ace"), "ace");
877 assert_eq!(lcs_string("abc", "def"), "");
878 }
879
880 #[test]
881 fn test_lcs_similarity() {
882 assert!((lcs_similarity("abc", "abc") - 1.0).abs() < 1e-10);
883 assert!((lcs_similarity("", "") - 1.0).abs() < 1e-10);
884 }
885
886 #[test]
889 fn test_word_ngrams() {
890 let tokens = vec!["a", "b", "c", "d"];
891 let bigrams = ngrams(&tokens, 2).expect("bigrams");
892 assert_eq!(bigrams.len(), 3);
893 assert_eq!(bigrams[0], vec!["a", "b"]);
894 assert_eq!(bigrams[2], vec!["c", "d"]);
895 }
896
897 #[test]
898 fn test_word_trigrams() {
899 let tokens = vec!["a", "b", "c", "d"];
900 let trigrams = ngrams(&tokens, 3).expect("trigrams");
901 assert_eq!(trigrams.len(), 2);
902 }
903
904 #[test]
905 fn test_ngrams_zero() {
906 assert!(ngrams(&["a"], 0).is_err());
907 }
908
909 #[test]
910 fn test_ngrams_too_short() {
911 let tokens = vec!["a"];
912 let result = ngrams(&tokens, 3).expect("ok");
913 assert!(result.is_empty());
914 }
915
916 #[test]
917 fn test_char_ngrams() {
918 let grams = char_ngrams("hello", 2).expect("ok");
919 assert_eq!(grams, vec!["he", "el", "ll", "lo"]);
920 }
921
922 #[test]
923 fn test_char_ngrams_zero() {
924 assert!(char_ngrams("hello", 0).is_err());
925 }
926
927 #[test]
928 fn test_skip_bigrams() {
929 let tokens = vec!["a", "b", "c"];
930 let result = skip_bigrams(&tokens, 1);
931 assert_eq!(result.len(), 3);
933 assert!(result.contains(&("a", "b")));
934 assert!(result.contains(&("a", "c")));
935 assert!(result.contains(&("b", "c")));
936 }
937
938 #[test]
941 fn test_tokenize_whitespace() {
942 let tokens = tokenize_whitespace(" hello world ");
943 assert_eq!(tokens, vec!["hello", "world"]);
944 }
945
946 #[test]
947 fn test_tokenize_char() {
948 let tokens = tokenize_char("a,b,,c", ',');
949 assert_eq!(tokens, vec!["a", "b", "", "c"]);
950 }
951
952 #[test]
953 fn test_tokenize_pattern() {
954 let tokens = tokenize_pattern("a::b::c", "::");
955 assert_eq!(tokens, vec!["a", "b", "c"]);
956 }
957
958 #[test]
959 fn test_tokenize_sentences() {
960 let sentences = tokenize_sentences("Hello world. How are you? Fine!");
961 assert_eq!(sentences.len(), 3);
962 assert_eq!(sentences[0], "Hello world");
963 }
964
965 #[test]
966 fn test_tokenize_words() {
967 let tokens = tokenize_words("Hello, world! It's 2026.");
968 assert_eq!(tokens, vec!["Hello", "world", "It", "s", "2026"]);
969 }
970
971 #[test]
974 fn test_to_snake_case() {
975 assert_eq!(to_snake_case("helloWorld"), "hello_world");
976 assert_eq!(to_snake_case("HTTPClient"), "http_client");
977 assert_eq!(to_snake_case("already_snake"), "already_snake");
978 assert_eq!(to_snake_case("SomeClassName"), "some_class_name");
979 }
980
981 #[test]
982 fn test_to_camel_case() {
983 assert_eq!(to_camel_case("hello_world"), "helloWorld");
984 assert_eq!(to_camel_case("some-kebab-case"), "someKebabCase");
985 assert_eq!(to_camel_case("SCREAMING_SNAKE"), "screamingSnake");
986 }
987
988 #[test]
989 fn test_to_pascal_case() {
990 assert_eq!(to_pascal_case("hello_world"), "HelloWorld");
991 assert_eq!(to_pascal_case("some-kebab"), "SomeKebab");
992 }
993
994 #[test]
995 fn test_to_kebab_case() {
996 assert_eq!(to_kebab_case("helloWorld"), "hello-world");
997 assert_eq!(to_kebab_case("SomeClassName"), "some-class-name");
998 }
999
1000 #[test]
1001 fn test_to_screaming_snake_case() {
1002 assert_eq!(to_screaming_snake_case("helloWorld"), "HELLO_WORLD");
1003 assert_eq!(to_screaming_snake_case("some-kebab"), "SOME_KEBAB");
1004 }
1005
1006 #[test]
1007 fn test_to_title_case() {
1008 assert_eq!(to_title_case("hello_world"), "Hello World");
1009 assert_eq!(to_title_case("helloWorld"), "Hello World");
1010 }
1011
1012 #[test]
1013 fn test_split_into_words() {
1014 let words = split_into_words("helloWorldFoo");
1015 assert_eq!(words, vec!["hello", "World", "Foo"]);
1016
1017 let words2 = split_into_words("snake_case_var");
1018 assert_eq!(words2, vec!["snake", "case", "var"]);
1019
1020 let words3 = split_into_words("kebab-case-var");
1021 assert_eq!(words3, vec!["kebab", "case", "var"]);
1022 }
1023
1024 #[test]
1027 fn test_dice_coefficient_identical() {
1028 assert!((dice_coefficient("night", "night") - 1.0).abs() < 1e-10);
1029 }
1030
1031 #[test]
1032 fn test_dice_coefficient_different() {
1033 let d = dice_coefficient("abc", "xyz");
1034 assert!(d < 0.01);
1035 }
1036
1037 #[test]
1040 fn test_pad_left() {
1041 assert_eq!(pad_left("42", 5, '0'), "00042");
1042 assert_eq!(pad_left("hello", 3, ' '), "hello"); }
1044
1045 #[test]
1046 fn test_pad_right() {
1047 assert_eq!(pad_right("hi", 5, '.'), "hi...");
1048 }
1049
1050 #[test]
1051 fn test_center() {
1052 assert_eq!(center("hi", 6, '-'), "--hi--");
1053 assert_eq!(center("hi", 7, '-'), "--hi---");
1054 }
1055
1056 #[test]
1059 fn test_reverse() {
1060 assert_eq!(reverse("hello"), "olleh");
1061 assert_eq!(reverse(""), "");
1062 }
1063
1064 #[test]
1065 fn test_is_palindrome() {
1066 assert!(is_palindrome("racecar"));
1067 assert!(is_palindrome("A man a plan a canal Panama"));
1068 assert!(!is_palindrome("hello"));
1069 assert!(is_palindrome(""));
1070 }
1071
1072 #[test]
1073 fn test_count_occurrences() {
1074 assert_eq!(count_occurrences("abababab", "ab"), 4);
1075 assert_eq!(count_occurrences("hello", "xyz"), 0);
1076 assert_eq!(count_occurrences("hello", ""), 0);
1077 }
1078
1079 #[test]
1082 fn test_levenshtein_unicode() {
1083 assert_eq!(levenshtein_distance("cafe", "cafe"), 0);
1084 assert_eq!(levenshtein_distance("abc", "abc"), 0);
1086 }
1087
1088 #[test]
1089 fn test_char_ngrams_unicode() {
1090 let grams = char_ngrams("ab", 2).expect("ok");
1091 assert_eq!(grams.len(), 1);
1092 assert_eq!(grams[0], "ab");
1093 }
1094}