1use crate::Result;
4use std::cmp::{max, min};
5use std::collections::HashMap;
6
7pub trait StringMetric {
9 fn distance(&self, s1: &str, s2: &str) -> Result<f64>;
11
12 fn normalized_distance(&self, s1: &str, s2: &str) -> Result<f64> {
14 let dist = self.distance(s1, s2)?;
15 let max_len = max(s1.len(), s2.len()) as f64;
16 Ok(if max_len > 0.0 { dist / max_len } else { 0.0 })
17 }
18
19 fn similarity(&self, s1: &str, s2: &str) -> Result<f64> {
21 Ok(1.0 - self.normalized_distance(s1, s2)?)
22 }
23}
24
25#[derive(Debug, Clone)]
27pub struct DamerauLevenshteinMetric {
28 restricted: bool,
30}
31
32impl DamerauLevenshteinMetric {
33 pub fn new() -> Self {
35 Self { restricted: false }
36 }
37
38 pub fn restricted() -> Self {
40 Self { restricted: true }
41 }
42}
43
44impl Default for DamerauLevenshteinMetric {
45 fn default() -> Self {
46 Self::new()
47 }
48}
49
50impl StringMetric for DamerauLevenshteinMetric {
51 fn distance(&self, s1: &str, s2: &str) -> Result<f64> {
52 if self.restricted {
53 Ok(osa_distance(s1, s2) as f64)
54 } else {
55 Ok(damerau_levenshtein_distance(s1, s2) as f64)
56 }
57 }
58}
59
60#[allow(dead_code)]
62fn osa_distance(s1: &str, s2: &str) -> usize {
63 let a: Vec<char> = s1.chars().collect();
64 let b: Vec<char> = s2.chars().collect();
65 let len_a = a.len();
66 let len_b = b.len();
67
68 if len_a == 0 {
69 return len_b;
70 }
71 if len_b == 0 {
72 return len_a;
73 }
74
75 let mut matrix = vec![vec![0; len_b + 1]; len_a + 1];
76
77 for (i, row) in matrix.iter_mut().enumerate().take(len_a + 1) {
78 row[0] = i;
79 }
80 for j in 0..=len_b {
82 matrix[0][j] = j;
83 }
84
85 for i in 1..=len_a {
86 for j in 1..=len_b {
87 let cost = if a[i - 1] == b[j - 1] { 0 } else { 1 };
88
89 matrix[i][j] = min(
90 min(
91 matrix[i - 1][j] + 1, matrix[i][j - 1] + 1, ),
94 matrix[i - 1][j - 1] + cost, );
96
97 if i > 1 && j > 1 && a[i - 1] == b[j - 2] && a[i - 2] == b[j - 1] {
99 matrix[i][j] = min(matrix[i][j], matrix[i - 2][j - 2] + cost);
100 }
101 }
102 }
103
104 matrix[len_a][len_b]
105}
106
107#[allow(dead_code)]
109fn damerau_levenshtein_distance(s1: &str, s2: &str) -> usize {
110 let a: Vec<char> = s1.chars().collect();
111 let b: Vec<char> = s2.chars().collect();
112 let len_a = a.len();
113 let len_b = b.len();
114
115 if len_a == 0 {
116 return len_b;
117 }
118 if len_b == 0 {
119 return len_a;
120 }
121
122 let max_dist = len_a + len_b;
123 let mut h = vec![vec![max_dist; len_b + 2]; len_a + 2];
124 let mut da: HashMap<char, usize> = HashMap::new();
125
126 for i in 0..=len_a {
127 h[i + 1][0] = max_dist;
128 h[i + 1][1] = i;
129 }
130 for j in 0..=len_b {
131 h[0][j + 1] = max_dist;
132 h[1][j + 1] = j;
133 }
134
135 for i in 1..=len_a {
136 let mut db = 0;
137 for j in 1..=len_b {
138 let k = da.get(&b[j - 1]).copied().unwrap_or(0);
139 let l = db;
140 let cost = if a[i - 1] == b[j - 1] {
141 db = j;
142 0
143 } else {
144 1
145 };
146
147 h[i + 1][j + 1] = min(
148 min(
149 h[i][j] + cost, h[i + 1][j] + 1, ),
152 min(
153 h[i][j + 1] + 1, h[k][l] + (i - k - 1) + 1 + (j - l - 1), ),
156 );
157 }
158
159 da.insert(a[i - 1], i);
160 }
161
162 h[len_a + 1][len_b + 1]
163}
164
165pub trait PhoneticAlgorithm {
167 fn encode(&self, text: &str) -> Result<String>;
169
170 fn sounds_like(&self, s1: &str, s2: &str) -> Result<bool> {
172 Ok(self.encode(s1)? == self.encode(s2)?)
173 }
174}
175
176#[derive(Debug, Clone)]
178pub struct Soundex {
179 length: usize,
181}
182
183impl Soundex {
184 pub fn new() -> Self {
186 Self { length: 4 }
187 }
188
189 pub fn with_length(length: usize) -> Self {
191 Self { length }
192 }
193}
194
195impl Default for Soundex {
196 fn default() -> Self {
197 Self::new()
198 }
199}
200
201impl PhoneticAlgorithm for Soundex {
202 fn encode(&self, text: &str) -> Result<String> {
203 if text.is_empty() {
204 return Ok(String::new());
205 }
206
207 let text = text.to_uppercase();
208 let chars: Vec<char> = text.chars().filter(|c| c.is_alphabetic()).collect();
209
210 if chars.is_empty() {
211 return Ok(String::new());
212 }
213
214 let mut code = String::new();
215 code.push(chars[0]);
216
217 let mut last_code = encode_char(chars[0]);
218
219 for &ch in &chars[1..] {
220 let ch_code = encode_char(ch);
221 if ch_code != '0' && ch_code != last_code {
222 code.push(ch_code);
223 last_code = ch_code;
224 }
225
226 if code.len() >= self.length {
227 break;
228 }
229 }
230
231 while code.len() < self.length {
233 code.push('0');
234 }
235
236 Ok(code)
237 }
238}
239
240#[allow(dead_code)]
242fn encode_char(ch: char) -> char {
243 match ch.to_ascii_uppercase() {
244 'B' | 'F' | 'P' | 'V' => '1',
245 'C' | 'G' | 'J' | 'K' | 'Q' | 'S' | 'X' | 'Z' => '2',
246 'D' | 'T' => '3',
247 'L' => '4',
248 'M' | 'N' => '5',
249 'R' => '6',
250 _ => '0',
251 }
252}
253
254#[derive(Debug, Clone)]
256pub struct Metaphone {
257 max_length: usize,
259}
260
261#[derive(Debug, Clone)]
263pub struct Nysiis {
264 max_length: usize,
266}
267
268#[derive(Debug, Clone)]
270pub struct NeedlemanWunsch {
271 match_score: i32,
273 mismatch_penalty: i32,
275 gap_penalty: i32,
277}
278
279#[derive(Debug, Clone, PartialEq)]
281pub struct AlignmentResult {
282 pub aligned_seq1: String,
284 pub aligned_seq2: String,
286 pub score: i32,
288}
289
290#[derive(Debug, Clone)]
292pub struct SmithWaterman {
293 match_score: i32,
295 mismatch_penalty: i32,
297 gap_penalty: i32,
299}
300
301impl Metaphone {
302 pub fn new() -> Self {
304 Self { max_length: 6 }
305 }
306
307 pub fn with_max_length(_maxlength: usize) -> Self {
309 Self {
310 max_length: _maxlength,
311 }
312 }
313}
314
315impl Default for Metaphone {
316 fn default() -> Self {
317 Self::new()
318 }
319}
320
321impl PhoneticAlgorithm for Metaphone {
322 fn encode(&self, text: &str) -> Result<String> {
323 if text.is_empty() {
324 return Ok(String::new());
325 }
326
327 let text = text.to_uppercase();
328 let chars: Vec<char> = text.chars().filter(|c| c.is_alphabetic()).collect();
329
330 if chars.is_empty() {
331 return Ok(String::new());
332 }
333
334 let mut result = String::new();
335 let mut i = 0;
336
337 if chars.len() >= 2 {
339 match (chars[0], chars[1]) {
340 ('A', 'E') | ('G', 'N') | ('K', 'N') | ('P', 'N') | ('W', 'R') => i = 1,
341 ('W', 'H') => {
342 result.push('W');
343 i = 2;
344 }
345 _ => {}
346 }
347 }
348
349 while i < chars.len() && result.len() < self.max_length {
350 let ch = chars[i];
351 let next = chars.get(i + 1).copied();
352 let prev = if i > 0 {
353 chars.get(i - 1).copied()
354 } else {
355 None
356 };
357
358 match ch {
359 'A' | 'E' | 'I' | 'O' | 'U' if i == 0 => {
360 result.push(ch);
361 }
362 'B' if !result.ends_with('B') => {
363 result.push('B');
364 }
365 'C' => {
366 if next == Some('H') {
367 result.push('X');
368 i += 1;
369 } else if next == Some('I') || next == Some('E') || next == Some('Y') {
370 result.push('S');
371 } else {
372 result.push('K');
373 }
374 }
375 'D' => {
376 if next == Some('G') && chars.get(i + 2) == Some(&'E') {
377 result.push('J');
378 i += 2;
379 } else {
380 result.push('T');
381 }
382 }
383 'F' | 'J' | 'L' | 'M' | 'N' | 'R' if !result.ends_with(ch) => {
384 result.push(ch);
385 }
386 'G' => {
387 if next == Some('H') {
388 i += 1;
389 } else if next == Some('N') {
390 result.push('N');
391 i += 1;
392 } else {
393 result.push('K');
394 }
395 }
396 'H' if prev != Some('C')
397 && prev != Some('S')
398 && prev != Some('P')
399 && prev != Some('T')
400 && prev != Some('G') =>
401 {
402 result.push('H');
403 }
404 'K' if prev != Some('C') => {
405 result.push('K');
406 }
407 'P' => {
408 if next == Some('H') {
409 result.push('F');
410 i += 1;
411 } else {
412 result.push('P');
413 }
414 }
415 'Q' => result.push('K'),
416 'S' => {
417 if next == Some('H') {
418 result.push('X');
419 i += 1;
420 } else if next == Some('I')
421 && (chars.get(i + 2) == Some(&'A') || chars.get(i + 2) == Some(&'O'))
422 {
423 result.push('X');
424 } else {
425 result.push('S');
426 }
427 }
428 'T' => {
429 if next == Some('H') {
430 result.push('0');
431 i += 1;
432 } else if next != Some('C') || chars.get(i + 2) != Some(&'H') {
433 result.push('T');
434 }
435 }
436 'V' => result.push('F'),
437 'W' | 'Y' if next.map(|c| "AEIOU".contains(c)).unwrap_or(false) => {
438 result.push(ch);
439 }
440 'X' => {
441 result.push('K');
442 result.push('S');
443 }
444 'Z' => result.push('S'),
445 _ => {}
446 }
447
448 i += 1;
449 }
450
451 Ok(result)
452 }
453}
454
455impl Nysiis {
456 pub fn new() -> Self {
458 Self { max_length: 0 }
459 }
460
461 pub fn with_max_length(_maxlength: usize) -> Self {
463 Self {
464 max_length: _maxlength,
465 }
466 }
467}
468
469impl Default for Nysiis {
470 fn default() -> Self {
471 Self::new()
472 }
473}
474
475impl PhoneticAlgorithm for Nysiis {
476 fn encode(&self, text: &str) -> Result<String> {
477 if text.is_empty() {
478 return Ok(String::new());
479 }
480
481 let mut word: Vec<char> = text
483 .to_uppercase()
484 .chars()
485 .filter(|c| c.is_alphabetic())
486 .collect();
487
488 if word.is_empty() {
489 return Ok(String::new());
490 }
491
492 if word.len() >= 3 {
494 let start3 = &word[0..3].iter().collect::<String>();
495 let start2 = &word[0..2].iter().collect::<String>();
496
497 if start3 == "MAC" {
498 word[1] = 'C';
499 } else if start2 == "KN" {
500 word.remove(0);
501 }
502 } else if word.len() >= 2 {
503 let start = &word[0..2].iter().collect::<String>();
504 if start == "KN" {
505 word.remove(0);
506 }
507 }
508
509 if word.len() >= 2 && (word[0] == 'P' && (word[1] == 'H' || word[1] == 'F')) {
511 word[0] = 'F';
512 word[1] = 'F';
513 }
514
515 if word.len() >= 3 && word[0] == 'S' && word[1] == 'C' && word[2] == 'H' {
517 word[0] = 'S';
518 word[1] = 'S';
519 word.remove(2); }
521
522 let len = word.len();
524 if len >= 2 {
525 let end = &word[len - 2..].iter().collect::<String>();
526 match end.as_str() {
527 "EE" | "IE" => {
528 word.truncate(len - 2);
529 word.push('Y');
530 }
531 "DT" | "RT" | "RD" | "NT" | "ND" => {
532 word.truncate(len - 1);
533 word.push('D');
534 }
535 _ => {}
536 }
537 }
538
539 let mut result = vec![word[0]];
541
542 for i in 1..word.len() {
544 let ch = word[i];
545 let prev = word[i - 1];
546 let next = word.get(i + 1).copied();
547
548 match ch {
549 'A' => {
550 if prev != 'A' && result.last() != Some(&'A') {
552 result.push('A');
553 }
554 }
555 'E' | 'I' | 'O' | 'U' => {
556 if prev == ch {
557 continue; }
559 if result.last() != Some(&'A') {
561 result.push('A');
562 }
563 }
564 'Q' => result.push('G'),
565 'Z' => result.push('S'),
566 'M' => result.push('N'),
567 'K' => {
568 if next == Some('N') {
569 result.push('N');
570 } else {
571 result.push('C');
572 }
573 }
574 'S' => {
575 if next == Some('C') && word.get(i + 2) == Some(&'H') {
576 result.push('S');
577 result.push('S');
578 result.push('S');
579 } else {
580 result.push('S');
581 }
582 }
583 'P' => {
584 if next == Some('H') {
585 result.push('F');
586 result.push('F');
587 } else {
588 result.push('P');
589 }
590 }
591 'H' => {
592 if prev == 'G' {
594 } else if !matches!(prev, 'A' | 'E' | 'I' | 'O' | 'U')
596 && !matches!(
597 next,
598 Some('A') | Some('E') | Some('I') | Some('O') | Some('U')
599 )
600 && prev != ch
601 {
602 result.push('H');
603 }
604 }
605 'W' => {
606 if matches!(prev, 'A' | 'E' | 'I' | 'O' | 'U') && prev != ch {
607 result.push('W');
608 }
609 }
610 _ => {
611 if prev != ch {
612 result.push(ch);
614 } else if i == 1 && ch == 'F' && result.len() == 1 && result[0] == 'F' {
615 result.push(ch);
617 } else if i == 1 && ch == 'S' && result.len() == 1 && result[0] == 'S' {
618 result.push(ch);
620 }
621 }
622 }
623 }
624
625 while result.len() > 1
627 && (result.last() == Some(&'S')
628 || result.last() == Some(&'A')
629 || result.last() == Some(&'H'))
630 {
631 result.pop();
632 }
633
634 if result.len() >= 2 && result[result.len() - 2] == 'A' && result[result.len() - 1] == 'Y' {
636 result.pop();
637 result.pop();
638 result.push('Y');
639 }
640
641 let mut encoded: String = result.into_iter().collect();
643 if self.max_length > 0 && encoded.len() > self.max_length {
644 encoded.truncate(self.max_length);
645 }
646
647 Ok(encoded)
648 }
649}
650
651impl NeedlemanWunsch {
652 pub fn new() -> Self {
654 Self {
655 match_score: 1,
656 mismatch_penalty: -1,
657 gap_penalty: -1,
658 }
659 }
660
661 pub fn with_scores(_match_score: i32, mismatch_penalty: i32, gappenalty: i32) -> Self {
663 Self {
664 match_score: _match_score,
665 mismatch_penalty,
666 gap_penalty: gappenalty,
667 }
668 }
669
670 pub fn align(&self, seq1: &str, seq2: &str) -> AlignmentResult {
672 let seq1_chars: Vec<char> = seq1.chars().collect();
673 let seq2_chars: Vec<char> = seq2.chars().collect();
674 let m = seq1_chars.len();
675 let n = seq2_chars.len();
676
677 let mut matrix = vec![vec![0; n + 1]; m + 1];
679
680 for (i, item) in matrix.iter_mut().enumerate().take(m + 1) {
682 item[0] = i as i32 * self.gap_penalty;
683 }
684 for j in 0..=n {
685 matrix[0][j] = j as i32 * self.gap_penalty;
686 }
687
688 for i in 1..=m {
690 for j in 1..=n {
691 let match_mismatch = if seq1_chars[i - 1] == seq2_chars[j - 1] {
692 matrix[i - 1][j - 1] + self.match_score
693 } else {
694 matrix[i - 1][j - 1] + self.mismatch_penalty
695 };
696
697 let delete = matrix[i - 1][j] + self.gap_penalty;
698 let insert = matrix[i][j - 1] + self.gap_penalty;
699
700 matrix[i][j] = *[match_mismatch, delete, insert]
701 .iter()
702 .max()
703 .expect("Operation failed");
704 }
705 }
706
707 let mut aligned_seq1 = String::new();
709 let mut aligned_seq2 = String::new();
710 let mut i = m;
711 let mut j = n;
712
713 while i > 0 || j > 0 {
714 if i > 0 && j > 0 {
715 let current_score = matrix[i][j];
716 let diagonal_score = if seq1_chars[i - 1] == seq2_chars[j - 1] {
717 matrix[i - 1][j - 1] + self.match_score
718 } else {
719 matrix[i - 1][j - 1] + self.mismatch_penalty
720 };
721 let up_score = matrix[i - 1][j] + self.gap_penalty;
722 let left_score = matrix[i][j - 1] + self.gap_penalty;
723
724 if current_score == diagonal_score {
727 aligned_seq1.insert(0, seq1_chars[i - 1]);
728 aligned_seq2.insert(0, seq2_chars[j - 1]);
729 i -= 1;
730 j -= 1;
731 } else if current_score == left_score {
732 aligned_seq1.insert(0, '-');
733 aligned_seq2.insert(0, seq2_chars[j - 1]);
734 j -= 1;
735 } else if current_score == up_score {
736 aligned_seq1.insert(0, seq1_chars[i - 1]);
737 aligned_seq2.insert(0, '-');
738 i -= 1;
739 } else {
740 aligned_seq1.insert(0, seq1_chars[i - 1]);
742 aligned_seq2.insert(0, seq2_chars[j - 1]);
743 i -= 1;
744 j -= 1;
745 }
746 } else if i > 0 {
747 aligned_seq1.insert(0, seq1_chars[i - 1]);
748 aligned_seq2.insert(0, '-');
749 i -= 1;
750 } else {
751 aligned_seq1.insert(0, '-');
752 aligned_seq2.insert(0, seq2_chars[j - 1]);
753 j -= 1;
754 }
755 }
756
757 AlignmentResult {
758 aligned_seq1,
759 aligned_seq2,
760 score: matrix[m][n],
761 }
762 }
763}
764
765impl Default for NeedlemanWunsch {
766 fn default() -> Self {
767 Self::new()
768 }
769}
770
771impl SmithWaterman {
772 pub fn new() -> Self {
774 Self {
775 match_score: 2,
776 mismatch_penalty: -1,
777 gap_penalty: -1,
778 }
779 }
780
781 pub fn with_scores(_match_score: i32, mismatch_penalty: i32, gappenalty: i32) -> Self {
783 Self {
784 match_score: _match_score,
785 mismatch_penalty,
786 gap_penalty: gappenalty,
787 }
788 }
789
790 pub fn align(&self, seq1: &str, seq2: &str) -> AlignmentResult {
792 let seq1_chars: Vec<char> = seq1.chars().collect();
793 let seq2_chars: Vec<char> = seq2.chars().collect();
794 let m = seq1_chars.len();
795 let n = seq2_chars.len();
796
797 let mut matrix = vec![vec![0; n + 1]; m + 1];
799 let mut max_score = 0;
800 let mut max_i = 0;
801 let mut max_j = 0;
802
803 for i in 1..=m {
805 for j in 1..=n {
806 let match_mismatch = if seq1_chars[i - 1] == seq2_chars[j - 1] {
807 matrix[i - 1][j - 1] + self.match_score
808 } else {
809 matrix[i - 1][j - 1] + self.mismatch_penalty
810 };
811
812 let delete = matrix[i - 1][j] + self.gap_penalty;
813 let insert = matrix[i][j - 1] + self.gap_penalty;
814
815 matrix[i][j] = *[0, match_mismatch, delete, insert]
816 .iter()
817 .max()
818 .expect("Operation failed");
819
820 if matrix[i][j] > max_score {
822 max_score = matrix[i][j];
823 max_i = i;
824 max_j = j;
825 }
826 }
827 }
828
829 let mut aligned_seq1 = String::new();
831 let mut aligned_seq2 = String::new();
832 let mut i = max_i;
833 let mut j = max_j;
834
835 while i > 0 && j > 0 && matrix[i][j] > 0 {
836 let current_score = matrix[i][j];
837 let diagonal_score = if seq1_chars[i - 1] == seq2_chars[j - 1] {
838 matrix[i - 1][j - 1] + self.match_score
839 } else {
840 matrix[i - 1][j - 1] + self.mismatch_penalty
841 };
842
843 if current_score == diagonal_score {
844 aligned_seq1.insert(0, seq1_chars[i - 1]);
845 aligned_seq2.insert(0, seq2_chars[j - 1]);
846 i -= 1;
847 j -= 1;
848 } else if current_score == matrix[i - 1][j] + self.gap_penalty {
849 aligned_seq1.insert(0, seq1_chars[i - 1]);
850 aligned_seq2.insert(0, '-');
851 i -= 1;
852 } else if current_score == matrix[i][j - 1] + self.gap_penalty {
853 aligned_seq1.insert(0, '-');
854 aligned_seq2.insert(0, seq2_chars[j - 1]);
855 j -= 1;
856 } else {
857 break;
859 }
860 }
861
862 AlignmentResult {
863 aligned_seq1,
864 aligned_seq2,
865 score: max_score,
866 }
867 }
868}
869
870impl Default for SmithWaterman {
871 fn default() -> Self {
872 Self::new()
873 }
874}
875
876#[cfg(test)]
877mod tests {
878 use super::*;
879
880 #[test]
881 fn test_damerau_levenshtein() {
882 let metric = DamerauLevenshteinMetric::new();
883
884 assert_eq!(metric.distance("", "").expect("Operation failed"), 0.0);
886 assert_eq!(metric.distance("abc", "").expect("Operation failed"), 3.0);
887 assert_eq!(metric.distance("", "abc").expect("Operation failed"), 3.0);
888 assert_eq!(
889 metric.distance("abc", "abc").expect("Operation failed"),
890 0.0
891 );
892
893 assert_eq!(
895 metric.distance("abc", "aXc").expect("Operation failed"),
896 1.0
897 ); assert_eq!(metric.distance("abc", "ac").expect("Operation failed"), 1.0); assert_eq!(metric.distance("ac", "abc").expect("Operation failed"), 1.0); assert_eq!(
901 metric.distance("abc", "acb").expect("Operation failed"),
902 1.0
903 ); assert_eq!(
907 metric
908 .distance("kitten", "sitting")
909 .expect("Operation failed"),
910 3.0
911 );
912
913 assert!(
915 (metric
916 .normalized_distance("abc", "aXc")
917 .expect("Operation failed")
918 - 0.333)
919 .abs()
920 < 0.01
921 );
922 assert_eq!(
923 metric
924 .normalized_distance("", "")
925 .expect("Operation failed"),
926 0.0
927 );
928
929 assert!((metric.similarity("abc", "aXc").expect("Operation failed") - 0.667).abs() < 0.01);
931 }
932
933 #[test]
934 fn test_restricted_damerau_levenshtein() {
935 let metric = DamerauLevenshteinMetric::restricted();
936
937 assert_eq!(metric.distance("ca", "abc").expect("Operation failed"), 3.0);
939 }
941
942 #[test]
943 fn test_soundex() {
944 let soundex = Soundex::new();
945
946 assert_eq!(soundex.encode("Robert").expect("Operation failed"), "R163");
947 assert_eq!(soundex.encode("Rupert").expect("Operation failed"), "R163");
948 assert_eq!(soundex.encode("SOUNDEX").expect("Operation failed"), "S532");
949 assert_eq!(soundex.encode("Smith").expect("Operation failed"), "S530");
950 assert_eq!(soundex.encode("Smythe").expect("Operation failed"), "S530");
951 assert_eq!(soundex.encode("").expect("Operation failed"), "");
952 assert_eq!(soundex.encode("123").expect("Operation failed"), "");
953
954 assert!(soundex
956 .sounds_like("Robert", "Rupert")
957 .expect("Operation failed"));
958 assert!(soundex
959 .sounds_like("Smith", "Smythe")
960 .expect("Operation failed"));
961 assert!(!soundex
962 .sounds_like("Smith", "Jones")
963 .expect("Operation failed"));
964
965 let soundex_5 = Soundex::with_length(5);
967 assert_eq!(
968 soundex_5.encode("SOUNDEX").expect("Operation failed"),
969 "S5320"
970 );
971 }
972
973 #[test]
974 fn test_metaphone() {
975 let metaphone = Metaphone::new();
976
977 assert_eq!(
978 metaphone.encode("programming").expect("Operation failed"),
979 "PRKRMN"
980 );
981 assert_eq!(
982 metaphone.encode("programmer").expect("Operation failed"),
983 "PRKRMR"
984 );
985 assert_eq!(metaphone.encode("Wright").expect("Operation failed"), "RT");
986 assert_eq!(metaphone.encode("White").expect("Operation failed"), "WT");
987 assert_eq!(metaphone.encode("Knight").expect("Operation failed"), "NT");
988 assert_eq!(metaphone.encode("").expect("Operation failed"), "");
989 assert_eq!(metaphone.encode("123").expect("Operation failed"), "");
990
991 assert!(metaphone
993 .sounds_like("Wright", "Write")
994 .expect("Operation failed"));
995 assert!(!metaphone
996 .sounds_like("White", "Wright")
997 .expect("Operation failed"));
998
999 let metaphone_3 = Metaphone::with_max_length(3);
1001 assert_eq!(
1002 metaphone_3.encode("programming").expect("Operation failed"),
1003 "PRK"
1004 );
1005 }
1006
1007 #[test]
1008 fn test_phonetic_edge_cases() {
1009 let soundex = Soundex::new();
1010 let metaphone = Metaphone::new();
1011
1012 assert_eq!(soundex.encode("O'Brien").expect("Operation failed"), "O165");
1014 assert_eq!(
1015 metaphone.encode("O'Brien").expect("Operation failed"),
1016 "OBRN"
1017 );
1018
1019 assert_eq!(
1021 soundex.encode("McDonald").expect("Operation failed"),
1022 "M235"
1023 );
1024 assert_eq!(
1025 metaphone.encode("McDonald").expect("Operation failed"),
1026 "MKTNLT"
1027 );
1028 }
1029
1030 #[test]
1031 fn test_nysiis() {
1032 let nysiis = Nysiis::new();
1033
1034 assert_eq!(
1036 nysiis.encode("Johnson").expect("Operation failed"),
1037 "JANSAN"
1038 );
1039 assert_eq!(
1040 nysiis.encode("Williams").expect("Operation failed"),
1041 "WALAN"
1042 ); assert_eq!(nysiis.encode("Jones").expect("Operation failed"), "JAN");
1044 assert_eq!(nysiis.encode("Smith").expect("Operation failed"), "SNAT");
1045 assert_eq!(
1046 nysiis.encode("MacDonald").expect("Operation failed"),
1047 "MCDANALD"
1048 );
1049 assert_eq!(nysiis.encode("Knight").expect("Operation failed"), "NAGT");
1050 assert_eq!(nysiis.encode("").expect("Operation failed"), "");
1051 assert_eq!(nysiis.encode("123").expect("Operation failed"), "");
1052
1053 assert!(nysiis
1055 .sounds_like("Johnson", "Jonson")
1056 .expect("Operation failed"));
1057 assert!(!nysiis
1058 .sounds_like("Smith", "Jones")
1059 .expect("Operation failed"));
1060
1061 assert_eq!(
1063 nysiis.encode("Philips").expect("Operation failed"),
1064 "FFALAP"
1065 );
1066 assert_eq!(nysiis.encode("Schmidt").expect("Operation failed"), "SSNAD");
1067 assert_eq!(
1068 nysiis.encode("Schneider").expect("Operation failed"),
1069 "SSNADAR"
1070 );
1071
1072 let nysiis_6 = Nysiis::with_max_length(6);
1074 assert_eq!(
1075 nysiis_6.encode("Williams").expect("Operation failed"),
1076 "WALAN"
1077 ); assert_eq!(
1079 nysiis_6.encode("MacDonald").expect("Operation failed"),
1080 "MCDANA"
1081 ); }
1083
1084 #[test]
1085 fn test_needleman_wunsch() {
1086 let aligner = NeedlemanWunsch::new();
1087
1088 let result = aligner.align("GATTACA", "GCATGCU");
1090
1091 assert_eq!(result.aligned_seq1, "G-ATTACA");
1093 assert_eq!(result.score, 0);
1094
1095 let seq2_chars = result
1097 .aligned_seq2
1098 .chars()
1099 .filter(|&c| c != '-')
1100 .collect::<String>();
1101 assert_eq!(seq2_chars, "GCATGCU");
1102
1103 assert_eq!(result.aligned_seq1.len(), result.aligned_seq2.len());
1105
1106 let result = aligner.align("HELLO", "HELLO");
1108 assert_eq!(result.aligned_seq1, "HELLO");
1109 assert_eq!(result.aligned_seq2, "HELLO");
1110 assert_eq!(result.score, 5);
1111
1112 let result = aligner.align("", "ABC");
1114 assert_eq!(result.aligned_seq1, "---");
1115 assert_eq!(result.aligned_seq2, "ABC");
1116 assert_eq!(result.score, -3);
1117
1118 let result = aligner.align("ABC", "");
1119 assert_eq!(result.aligned_seq1, "ABC");
1120 assert_eq!(result.aligned_seq2, "---");
1121 assert_eq!(result.score, -3);
1122
1123 let custom_aligner = NeedlemanWunsch::with_scores(2, -2, -1);
1125 let result = custom_aligner.align("CAT", "CART");
1126 assert!(result.score > 0);
1127 }
1128
1129 #[test]
1130 fn test_smith_waterman() {
1131 let aligner = SmithWaterman::new();
1132
1133 let result = aligner.align("GGTTGACTA", "TGTTACGG");
1135 assert!(result.score > 0);
1136 assert!(result.aligned_seq1.contains("GTT"));
1137 assert!(result.aligned_seq2.contains("GTT"));
1138
1139 let result = aligner.align("ABCDEFG", "XYZCDEFPQ");
1141 assert_eq!(result.aligned_seq1, "CDEF");
1142 assert_eq!(result.aligned_seq2, "CDEF");
1143 assert_eq!(result.score, 8); let result = aligner.align("", "ABC");
1147 assert_eq!(result.aligned_seq1, "");
1148 assert_eq!(result.aligned_seq2, "");
1149 assert_eq!(result.score, 0);
1150
1151 let result = aligner.align("AAA", "BBB");
1153 assert_eq!(result.score, 0);
1154
1155 let custom_aligner = SmithWaterman::with_scores(3, -3, -2);
1157 let result = custom_aligner.align("ACACACTA", "AGCACACA");
1158 assert!(result.score > 0);
1159 }
1160}