1use crate::Result;
4use std::cmp::{max, min};
5use std::collections::HashMap;
6
7pub trait StringMetric {
9 fn distance(&self, s1: &str, s2: &str) -> Result<f64>;
11
12 fn normalized_distance(&self, s1: &str, s2: &str) -> Result<f64> {
14 let dist = self.distance(s1, s2)?;
15 let max_len = max(s1.len(), s2.len()) as f64;
16 Ok(if max_len > 0.0 { dist / max_len } else { 0.0 })
17 }
18
19 fn similarity(&self, s1: &str, s2: &str) -> Result<f64> {
21 Ok(1.0 - self.normalized_distance(s1, s2)?)
22 }
23}
24
25#[derive(Debug, Clone)]
27pub struct DamerauLevenshteinMetric {
28 restricted: bool,
30}
31
32impl DamerauLevenshteinMetric {
33 pub fn new() -> Self {
35 Self { restricted: false }
36 }
37
38 pub fn restricted() -> Self {
40 Self { restricted: true }
41 }
42}
43
44impl Default for DamerauLevenshteinMetric {
45 fn default() -> Self {
46 Self::new()
47 }
48}
49
50impl StringMetric for DamerauLevenshteinMetric {
51 fn distance(&self, s1: &str, s2: &str) -> Result<f64> {
52 if self.restricted {
53 Ok(osa_distance(s1, s2) as f64)
54 } else {
55 Ok(damerau_levenshtein_distance(s1, s2) as f64)
56 }
57 }
58}
59
60#[allow(dead_code)]
62fn osa_distance(s1: &str, s2: &str) -> usize {
63 let a: Vec<char> = s1.chars().collect();
64 let b: Vec<char> = s2.chars().collect();
65 let len_a = a.len();
66 let len_b = b.len();
67
68 if len_a == 0 {
69 return len_b;
70 }
71 if len_b == 0 {
72 return len_a;
73 }
74
75 let mut matrix = vec![vec![0; len_b + 1]; len_a + 1];
76
77 for (i, row) in matrix.iter_mut().enumerate().take(len_a + 1) {
78 row[0] = i;
79 }
80 for j in 0..=len_b {
82 matrix[0][j] = j;
83 }
84
85 for i in 1..=len_a {
86 for j in 1..=len_b {
87 let cost = if a[i - 1] == b[j - 1] { 0 } else { 1 };
88
89 matrix[i][j] = min(
90 min(
91 matrix[i - 1][j] + 1, matrix[i][j - 1] + 1, ),
94 matrix[i - 1][j - 1] + cost, );
96
97 if i > 1 && j > 1 && a[i - 1] == b[j - 2] && a[i - 2] == b[j - 1] {
99 matrix[i][j] = min(matrix[i][j], matrix[i - 2][j - 2] + cost);
100 }
101 }
102 }
103
104 matrix[len_a][len_b]
105}
106
107#[allow(dead_code)]
109fn damerau_levenshtein_distance(s1: &str, s2: &str) -> usize {
110 let a: Vec<char> = s1.chars().collect();
111 let b: Vec<char> = s2.chars().collect();
112 let len_a = a.len();
113 let len_b = b.len();
114
115 if len_a == 0 {
116 return len_b;
117 }
118 if len_b == 0 {
119 return len_a;
120 }
121
122 let max_dist = len_a + len_b;
123 let mut h = vec![vec![max_dist; len_b + 2]; len_a + 2];
124 let mut da: HashMap<char, usize> = HashMap::new();
125
126 for i in 0..=len_a {
127 h[i + 1][0] = max_dist;
128 h[i + 1][1] = i;
129 }
130 for j in 0..=len_b {
131 h[0][j + 1] = max_dist;
132 h[1][j + 1] = j;
133 }
134
135 for i in 1..=len_a {
136 let mut db = 0;
137 for j in 1..=len_b {
138 let k = da.get(&b[j - 1]).copied().unwrap_or(0);
139 let l = db;
140 let cost = if a[i - 1] == b[j - 1] {
141 db = j;
142 0
143 } else {
144 1
145 };
146
147 h[i + 1][j + 1] = min(
148 min(
149 h[i][j] + cost, h[i + 1][j] + 1, ),
152 min(
153 h[i][j + 1] + 1, h[k][l] + (i - k - 1) + 1 + (j - l - 1), ),
156 );
157 }
158
159 da.insert(a[i - 1], i);
160 }
161
162 h[len_a + 1][len_b + 1]
163}
164
165pub trait PhoneticAlgorithm {
167 fn encode(&self, text: &str) -> Result<String>;
169
170 fn sounds_like(&self, s1: &str, s2: &str) -> Result<bool> {
172 Ok(self.encode(s1)? == self.encode(s2)?)
173 }
174}
175
176#[derive(Debug, Clone)]
178pub struct Soundex {
179 length: usize,
181}
182
183impl Soundex {
184 pub fn new() -> Self {
186 Self { length: 4 }
187 }
188
189 pub fn with_length(length: usize) -> Self {
191 Self { length }
192 }
193}
194
195impl Default for Soundex {
196 fn default() -> Self {
197 Self::new()
198 }
199}
200
201impl PhoneticAlgorithm for Soundex {
202 fn encode(&self, text: &str) -> Result<String> {
203 if text.is_empty() {
204 return Ok(String::new());
205 }
206
207 let text = text.to_uppercase();
208 let chars: Vec<char> = text.chars().filter(|c| c.is_alphabetic()).collect();
209
210 if chars.is_empty() {
211 return Ok(String::new());
212 }
213
214 let mut code = String::new();
215 code.push(chars[0]);
216
217 let mut last_code = encode_char(chars[0]);
218
219 for &ch in &chars[1..] {
220 let ch_code = encode_char(ch);
221 if ch_code != '0' && ch_code != last_code {
222 code.push(ch_code);
223 last_code = ch_code;
224 }
225
226 if code.len() >= self.length {
227 break;
228 }
229 }
230
231 while code.len() < self.length {
233 code.push('0');
234 }
235
236 Ok(code)
237 }
238}
239
240#[allow(dead_code)]
242fn encode_char(ch: char) -> char {
243 match ch.to_ascii_uppercase() {
244 'B' | 'F' | 'P' | 'V' => '1',
245 'C' | 'G' | 'J' | 'K' | 'Q' | 'S' | 'X' | 'Z' => '2',
246 'D' | 'T' => '3',
247 'L' => '4',
248 'M' | 'N' => '5',
249 'R' => '6',
250 _ => '0',
251 }
252}
253
254#[derive(Debug, Clone)]
256pub struct Metaphone {
257 max_length: usize,
259}
260
261#[derive(Debug, Clone)]
263pub struct Nysiis {
264 max_length: usize,
266}
267
268#[derive(Debug, Clone)]
270pub struct NeedlemanWunsch {
271 match_score: i32,
273 mismatch_penalty: i32,
275 gap_penalty: i32,
277}
278
279#[derive(Debug, Clone, PartialEq)]
281pub struct AlignmentResult {
282 pub aligned_seq1: String,
284 pub aligned_seq2: String,
286 pub score: i32,
288}
289
290#[derive(Debug, Clone)]
292pub struct SmithWaterman {
293 match_score: i32,
295 mismatch_penalty: i32,
297 gap_penalty: i32,
299}
300
301impl Metaphone {
302 pub fn new() -> Self {
304 Self { max_length: 6 }
305 }
306
307 pub fn with_max_length(_maxlength: usize) -> Self {
309 Self {
310 max_length: _maxlength,
311 }
312 }
313}
314
315impl Default for Metaphone {
316 fn default() -> Self {
317 Self::new()
318 }
319}
320
321impl PhoneticAlgorithm for Metaphone {
322 fn encode(&self, text: &str) -> Result<String> {
323 if text.is_empty() {
324 return Ok(String::new());
325 }
326
327 let text = text.to_uppercase();
328 let chars: Vec<char> = text.chars().filter(|c| c.is_alphabetic()).collect();
329
330 if chars.is_empty() {
331 return Ok(String::new());
332 }
333
334 let mut result = String::new();
335 let mut i = 0;
336
337 if chars.len() >= 2 {
339 match (chars[0], chars[1]) {
340 ('A', 'E') | ('G', 'N') | ('K', 'N') | ('P', 'N') | ('W', 'R') => i = 1,
341 ('W', 'H') => {
342 result.push('W');
343 i = 2;
344 }
345 _ => {}
346 }
347 }
348
349 while i < chars.len() && result.len() < self.max_length {
350 let ch = chars[i];
351 let next = chars.get(i + 1).copied();
352 let prev = if i > 0 {
353 chars.get(i - 1).copied()
354 } else {
355 None
356 };
357
358 match ch {
359 'A' | 'E' | 'I' | 'O' | 'U' => {
360 if i == 0 {
361 result.push(ch);
362 }
363 }
364 'B' => {
365 if !result.ends_with('B') {
366 result.push('B');
367 }
368 }
369 'C' => {
370 if next == Some('H') {
371 result.push('X');
372 i += 1;
373 } else if next == Some('I') || next == Some('E') || next == Some('Y') {
374 result.push('S');
375 } else {
376 result.push('K');
377 }
378 }
379 'D' => {
380 if next == Some('G') && chars.get(i + 2) == Some(&'E') {
381 result.push('J');
382 i += 2;
383 } else {
384 result.push('T');
385 }
386 }
387 'F' | 'J' | 'L' | 'M' | 'N' | 'R' => {
388 if !result.ends_with(ch) {
389 result.push(ch);
390 }
391 }
392 'G' => {
393 if next == Some('H') {
394 i += 1;
395 } else if next == Some('N') {
396 result.push('N');
397 i += 1;
398 } else {
399 result.push('K');
400 }
401 }
402 'H' => {
403 if prev != Some('C')
404 && prev != Some('S')
405 && prev != Some('P')
406 && prev != Some('T')
407 && prev != Some('G')
408 {
409 result.push('H');
410 }
411 }
412 'K' => {
413 if prev != Some('C') {
414 result.push('K');
415 }
416 }
417 'P' => {
418 if next == Some('H') {
419 result.push('F');
420 i += 1;
421 } else {
422 result.push('P');
423 }
424 }
425 'Q' => result.push('K'),
426 'S' => {
427 if next == Some('H') {
428 result.push('X');
429 i += 1;
430 } else if next == Some('I')
431 && (chars.get(i + 2) == Some(&'A') || chars.get(i + 2) == Some(&'O'))
432 {
433 result.push('X');
434 } else {
435 result.push('S');
436 }
437 }
438 'T' => {
439 if next == Some('H') {
440 result.push('0');
441 i += 1;
442 } else if next != Some('C') || chars.get(i + 2) != Some(&'H') {
443 result.push('T');
444 }
445 }
446 'V' => result.push('F'),
447 'W' | 'Y' => {
448 if next.map(|c| "AEIOU".contains(c)).unwrap_or(false) {
449 result.push(ch);
450 }
451 }
452 'X' => {
453 result.push('K');
454 result.push('S');
455 }
456 'Z' => result.push('S'),
457 _ => {}
458 }
459
460 i += 1;
461 }
462
463 Ok(result)
464 }
465}
466
467impl Nysiis {
468 pub fn new() -> Self {
470 Self { max_length: 0 }
471 }
472
473 pub fn with_max_length(_maxlength: usize) -> Self {
475 Self {
476 max_length: _maxlength,
477 }
478 }
479}
480
481impl Default for Nysiis {
482 fn default() -> Self {
483 Self::new()
484 }
485}
486
487impl PhoneticAlgorithm for Nysiis {
488 fn encode(&self, text: &str) -> Result<String> {
489 if text.is_empty() {
490 return Ok(String::new());
491 }
492
493 let mut word: Vec<char> = text
495 .to_uppercase()
496 .chars()
497 .filter(|c| c.is_alphabetic())
498 .collect();
499
500 if word.is_empty() {
501 return Ok(String::new());
502 }
503
504 if word.len() >= 3 {
506 let start3 = &word[0..3].iter().collect::<String>();
507 let start2 = &word[0..2].iter().collect::<String>();
508
509 if start3 == "MAC" {
510 word[1] = 'C';
511 } else if start2 == "KN" {
512 word.remove(0);
513 }
514 } else if word.len() >= 2 {
515 let start = &word[0..2].iter().collect::<String>();
516 if start == "KN" {
517 word.remove(0);
518 }
519 }
520
521 if word.len() >= 2 && (word[0] == 'P' && (word[1] == 'H' || word[1] == 'F')) {
523 word[0] = 'F';
524 word[1] = 'F';
525 }
526
527 if word.len() >= 3 && word[0] == 'S' && word[1] == 'C' && word[2] == 'H' {
529 word[0] = 'S';
530 word[1] = 'S';
531 word.remove(2); }
533
534 let len = word.len();
536 if len >= 2 {
537 let end = &word[len - 2..].iter().collect::<String>();
538 match end.as_str() {
539 "EE" | "IE" => {
540 word.truncate(len - 2);
541 word.push('Y');
542 }
543 "DT" | "RT" | "RD" | "NT" | "ND" => {
544 word.truncate(len - 1);
545 word.push('D');
546 }
547 _ => {}
548 }
549 }
550
551 let mut result = vec![word[0]];
553
554 for i in 1..word.len() {
556 let ch = word[i];
557 let prev = word[i - 1];
558 let next = word.get(i + 1).copied();
559
560 match ch {
561 'A' => {
562 if prev != 'A' && result.last() != Some(&'A') {
564 result.push('A');
565 }
566 }
567 'E' | 'I' | 'O' | 'U' => {
568 if prev == ch {
569 continue; }
571 if result.last() != Some(&'A') {
573 result.push('A');
574 }
575 }
576 'Q' => result.push('G'),
577 'Z' => result.push('S'),
578 'M' => result.push('N'),
579 'K' => {
580 if next == Some('N') {
581 result.push('N');
582 } else {
583 result.push('C');
584 }
585 }
586 'S' => {
587 if next == Some('C') && word.get(i + 2) == Some(&'H') {
588 result.push('S');
589 result.push('S');
590 result.push('S');
591 } else {
592 result.push('S');
593 }
594 }
595 'P' => {
596 if next == Some('H') {
597 result.push('F');
598 result.push('F');
599 } else {
600 result.push('P');
601 }
602 }
603 'H' => {
604 if prev == 'G' {
606 } else if !matches!(prev, 'A' | 'E' | 'I' | 'O' | 'U')
608 && !matches!(
609 next,
610 Some('A') | Some('E') | Some('I') | Some('O') | Some('U')
611 )
612 && prev != ch
613 {
614 result.push('H');
615 }
616 }
617 'W' => {
618 if matches!(prev, 'A' | 'E' | 'I' | 'O' | 'U') && prev != ch {
619 result.push('W');
620 }
621 }
622 _ => {
623 if prev != ch {
624 result.push(ch);
626 } else if i == 1 && ch == 'F' && result.len() == 1 && result[0] == 'F' {
627 result.push(ch);
629 } else if i == 1 && ch == 'S' && result.len() == 1 && result[0] == 'S' {
630 result.push(ch);
632 }
633 }
634 }
635 }
636
637 while result.len() > 1
639 && (result.last() == Some(&'S')
640 || result.last() == Some(&'A')
641 || result.last() == Some(&'H'))
642 {
643 result.pop();
644 }
645
646 if result.len() >= 2 && result[result.len() - 2] == 'A' && result[result.len() - 1] == 'Y' {
648 result.pop();
649 result.pop();
650 result.push('Y');
651 }
652
653 let mut encoded: String = result.into_iter().collect();
655 if self.max_length > 0 && encoded.len() > self.max_length {
656 encoded.truncate(self.max_length);
657 }
658
659 Ok(encoded)
660 }
661}
662
663impl NeedlemanWunsch {
664 pub fn new() -> Self {
666 Self {
667 match_score: 1,
668 mismatch_penalty: -1,
669 gap_penalty: -1,
670 }
671 }
672
673 pub fn with_scores(_match_score: i32, mismatch_penalty: i32, gappenalty: i32) -> Self {
675 Self {
676 match_score: _match_score,
677 mismatch_penalty,
678 gap_penalty: gappenalty,
679 }
680 }
681
682 pub fn align(&self, seq1: &str, seq2: &str) -> AlignmentResult {
684 let seq1_chars: Vec<char> = seq1.chars().collect();
685 let seq2_chars: Vec<char> = seq2.chars().collect();
686 let m = seq1_chars.len();
687 let n = seq2_chars.len();
688
689 let mut matrix = vec![vec![0; n + 1]; m + 1];
691
692 for (i, item) in matrix.iter_mut().enumerate().take(m + 1) {
694 item[0] = i as i32 * self.gap_penalty;
695 }
696 for j in 0..=n {
697 matrix[0][j] = j as i32 * self.gap_penalty;
698 }
699
700 for i in 1..=m {
702 for j in 1..=n {
703 let match_mismatch = if seq1_chars[i - 1] == seq2_chars[j - 1] {
704 matrix[i - 1][j - 1] + self.match_score
705 } else {
706 matrix[i - 1][j - 1] + self.mismatch_penalty
707 };
708
709 let delete = matrix[i - 1][j] + self.gap_penalty;
710 let insert = matrix[i][j - 1] + self.gap_penalty;
711
712 matrix[i][j] = *[match_mismatch, delete, insert].iter().max().unwrap();
713 }
714 }
715
716 let mut aligned_seq1 = String::new();
718 let mut aligned_seq2 = String::new();
719 let mut i = m;
720 let mut j = n;
721
722 while i > 0 || j > 0 {
723 if i > 0 && j > 0 {
724 let current_score = matrix[i][j];
725 let diagonal_score = if seq1_chars[i - 1] == seq2_chars[j - 1] {
726 matrix[i - 1][j - 1] + self.match_score
727 } else {
728 matrix[i - 1][j - 1] + self.mismatch_penalty
729 };
730 let up_score = matrix[i - 1][j] + self.gap_penalty;
731 let left_score = matrix[i][j - 1] + self.gap_penalty;
732
733 if current_score == diagonal_score {
736 aligned_seq1.insert(0, seq1_chars[i - 1]);
737 aligned_seq2.insert(0, seq2_chars[j - 1]);
738 i -= 1;
739 j -= 1;
740 } else if current_score == left_score {
741 aligned_seq1.insert(0, '-');
742 aligned_seq2.insert(0, seq2_chars[j - 1]);
743 j -= 1;
744 } else if current_score == up_score {
745 aligned_seq1.insert(0, seq1_chars[i - 1]);
746 aligned_seq2.insert(0, '-');
747 i -= 1;
748 } else {
749 aligned_seq1.insert(0, seq1_chars[i - 1]);
751 aligned_seq2.insert(0, seq2_chars[j - 1]);
752 i -= 1;
753 j -= 1;
754 }
755 } else if i > 0 {
756 aligned_seq1.insert(0, seq1_chars[i - 1]);
757 aligned_seq2.insert(0, '-');
758 i -= 1;
759 } else {
760 aligned_seq1.insert(0, '-');
761 aligned_seq2.insert(0, seq2_chars[j - 1]);
762 j -= 1;
763 }
764 }
765
766 AlignmentResult {
767 aligned_seq1,
768 aligned_seq2,
769 score: matrix[m][n],
770 }
771 }
772}
773
774impl Default for NeedlemanWunsch {
775 fn default() -> Self {
776 Self::new()
777 }
778}
779
780impl SmithWaterman {
781 pub fn new() -> Self {
783 Self {
784 match_score: 2,
785 mismatch_penalty: -1,
786 gap_penalty: -1,
787 }
788 }
789
790 pub fn with_scores(_match_score: i32, mismatch_penalty: i32, gappenalty: i32) -> Self {
792 Self {
793 match_score: _match_score,
794 mismatch_penalty,
795 gap_penalty: gappenalty,
796 }
797 }
798
799 pub fn align(&self, seq1: &str, seq2: &str) -> AlignmentResult {
801 let seq1_chars: Vec<char> = seq1.chars().collect();
802 let seq2_chars: Vec<char> = seq2.chars().collect();
803 let m = seq1_chars.len();
804 let n = seq2_chars.len();
805
806 let mut matrix = vec![vec![0; n + 1]; m + 1];
808 let mut max_score = 0;
809 let mut max_i = 0;
810 let mut max_j = 0;
811
812 for i in 1..=m {
814 for j in 1..=n {
815 let match_mismatch = if seq1_chars[i - 1] == seq2_chars[j - 1] {
816 matrix[i - 1][j - 1] + self.match_score
817 } else {
818 matrix[i - 1][j - 1] + self.mismatch_penalty
819 };
820
821 let delete = matrix[i - 1][j] + self.gap_penalty;
822 let insert = matrix[i][j - 1] + self.gap_penalty;
823
824 matrix[i][j] = *[0, match_mismatch, delete, insert].iter().max().unwrap();
825
826 if matrix[i][j] > max_score {
828 max_score = matrix[i][j];
829 max_i = i;
830 max_j = j;
831 }
832 }
833 }
834
835 let mut aligned_seq1 = String::new();
837 let mut aligned_seq2 = String::new();
838 let mut i = max_i;
839 let mut j = max_j;
840
841 while i > 0 && j > 0 && matrix[i][j] > 0 {
842 let current_score = matrix[i][j];
843 let diagonal_score = if seq1_chars[i - 1] == seq2_chars[j - 1] {
844 matrix[i - 1][j - 1] + self.match_score
845 } else {
846 matrix[i - 1][j - 1] + self.mismatch_penalty
847 };
848
849 if current_score == diagonal_score {
850 aligned_seq1.insert(0, seq1_chars[i - 1]);
851 aligned_seq2.insert(0, seq2_chars[j - 1]);
852 i -= 1;
853 j -= 1;
854 } else if current_score == matrix[i - 1][j] + self.gap_penalty {
855 aligned_seq1.insert(0, seq1_chars[i - 1]);
856 aligned_seq2.insert(0, '-');
857 i -= 1;
858 } else if current_score == matrix[i][j - 1] + self.gap_penalty {
859 aligned_seq1.insert(0, '-');
860 aligned_seq2.insert(0, seq2_chars[j - 1]);
861 j -= 1;
862 } else {
863 break;
865 }
866 }
867
868 AlignmentResult {
869 aligned_seq1,
870 aligned_seq2,
871 score: max_score,
872 }
873 }
874}
875
876impl Default for SmithWaterman {
877 fn default() -> Self {
878 Self::new()
879 }
880}
881
882#[cfg(test)]
883mod tests {
884 use super::*;
885
886 #[test]
887 fn test_damerau_levenshtein() {
888 let metric = DamerauLevenshteinMetric::new();
889
890 assert_eq!(metric.distance("", "").unwrap(), 0.0);
892 assert_eq!(metric.distance("abc", "").unwrap(), 3.0);
893 assert_eq!(metric.distance("", "abc").unwrap(), 3.0);
894 assert_eq!(metric.distance("abc", "abc").unwrap(), 0.0);
895
896 assert_eq!(metric.distance("abc", "aXc").unwrap(), 1.0); assert_eq!(metric.distance("abc", "ac").unwrap(), 1.0); assert_eq!(metric.distance("ac", "abc").unwrap(), 1.0); assert_eq!(metric.distance("abc", "acb").unwrap(), 1.0); assert_eq!(metric.distance("kitten", "sitting").unwrap(), 3.0);
904
905 assert!((metric.normalized_distance("abc", "aXc").unwrap() - 0.333).abs() < 0.01);
907 assert_eq!(metric.normalized_distance("", "").unwrap(), 0.0);
908
909 assert!((metric.similarity("abc", "aXc").unwrap() - 0.667).abs() < 0.01);
911 }
912
913 #[test]
914 fn test_restricted_damerau_levenshtein() {
915 let metric = DamerauLevenshteinMetric::restricted();
916
917 assert_eq!(metric.distance("ca", "abc").unwrap(), 3.0); }
920
921 #[test]
922 fn test_soundex() {
923 let soundex = Soundex::new();
924
925 assert_eq!(soundex.encode("Robert").unwrap(), "R163");
926 assert_eq!(soundex.encode("Rupert").unwrap(), "R163");
927 assert_eq!(soundex.encode("SOUNDEX").unwrap(), "S532");
928 assert_eq!(soundex.encode("Smith").unwrap(), "S530");
929 assert_eq!(soundex.encode("Smythe").unwrap(), "S530");
930 assert_eq!(soundex.encode("").unwrap(), "");
931 assert_eq!(soundex.encode("123").unwrap(), "");
932
933 assert!(soundex.sounds_like("Robert", "Rupert").unwrap());
935 assert!(soundex.sounds_like("Smith", "Smythe").unwrap());
936 assert!(!soundex.sounds_like("Smith", "Jones").unwrap());
937
938 let soundex_5 = Soundex::with_length(5);
940 assert_eq!(soundex_5.encode("SOUNDEX").unwrap(), "S5320");
941 }
942
943 #[test]
944 fn test_metaphone() {
945 let metaphone = Metaphone::new();
946
947 assert_eq!(metaphone.encode("programming").unwrap(), "PRKRMN");
948 assert_eq!(metaphone.encode("programmer").unwrap(), "PRKRMR");
949 assert_eq!(metaphone.encode("Wright").unwrap(), "RT");
950 assert_eq!(metaphone.encode("White").unwrap(), "WT");
951 assert_eq!(metaphone.encode("Knight").unwrap(), "NT");
952 assert_eq!(metaphone.encode("").unwrap(), "");
953 assert_eq!(metaphone.encode("123").unwrap(), "");
954
955 assert!(metaphone.sounds_like("Wright", "Write").unwrap());
957 assert!(!metaphone.sounds_like("White", "Wright").unwrap());
958
959 let metaphone_3 = Metaphone::with_max_length(3);
961 assert_eq!(metaphone_3.encode("programming").unwrap(), "PRK");
962 }
963
964 #[test]
965 fn test_phonetic_edge_cases() {
966 let soundex = Soundex::new();
967 let metaphone = Metaphone::new();
968
969 assert_eq!(soundex.encode("O'Brien").unwrap(), "O165");
971 assert_eq!(metaphone.encode("O'Brien").unwrap(), "OBRN");
972
973 assert_eq!(soundex.encode("McDonald").unwrap(), "M235");
975 assert_eq!(metaphone.encode("McDonald").unwrap(), "MKTNLT");
976 }
977
978 #[test]
979 fn test_nysiis() {
980 let nysiis = Nysiis::new();
981
982 assert_eq!(nysiis.encode("Johnson").unwrap(), "JANSAN");
984 assert_eq!(nysiis.encode("Williams").unwrap(), "WALAN"); assert_eq!(nysiis.encode("Jones").unwrap(), "JAN");
986 assert_eq!(nysiis.encode("Smith").unwrap(), "SNAT");
987 assert_eq!(nysiis.encode("MacDonald").unwrap(), "MCDANALD");
988 assert_eq!(nysiis.encode("Knight").unwrap(), "NAGT");
989 assert_eq!(nysiis.encode("").unwrap(), "");
990 assert_eq!(nysiis.encode("123").unwrap(), "");
991
992 assert!(nysiis.sounds_like("Johnson", "Jonson").unwrap());
994 assert!(!nysiis.sounds_like("Smith", "Jones").unwrap());
995
996 assert_eq!(nysiis.encode("Philips").unwrap(), "FFALAP");
998 assert_eq!(nysiis.encode("Schmidt").unwrap(), "SSNAD");
999 assert_eq!(nysiis.encode("Schneider").unwrap(), "SSNADAR");
1000
1001 let nysiis_6 = Nysiis::with_max_length(6);
1003 assert_eq!(nysiis_6.encode("Williams").unwrap(), "WALAN"); assert_eq!(nysiis_6.encode("MacDonald").unwrap(), "MCDANA"); }
1006
1007 #[test]
1008 fn test_needleman_wunsch() {
1009 let aligner = NeedlemanWunsch::new();
1010
1011 let result = aligner.align("GATTACA", "GCATGCU");
1013
1014 assert_eq!(result.aligned_seq1, "G-ATTACA");
1016 assert_eq!(result.score, 0);
1017
1018 let seq2_chars = result
1020 .aligned_seq2
1021 .chars()
1022 .filter(|&c| c != '-')
1023 .collect::<String>();
1024 assert_eq!(seq2_chars, "GCATGCU");
1025
1026 assert_eq!(result.aligned_seq1.len(), result.aligned_seq2.len());
1028
1029 let result = aligner.align("HELLO", "HELLO");
1031 assert_eq!(result.aligned_seq1, "HELLO");
1032 assert_eq!(result.aligned_seq2, "HELLO");
1033 assert_eq!(result.score, 5);
1034
1035 let result = aligner.align("", "ABC");
1037 assert_eq!(result.aligned_seq1, "---");
1038 assert_eq!(result.aligned_seq2, "ABC");
1039 assert_eq!(result.score, -3);
1040
1041 let result = aligner.align("ABC", "");
1042 assert_eq!(result.aligned_seq1, "ABC");
1043 assert_eq!(result.aligned_seq2, "---");
1044 assert_eq!(result.score, -3);
1045
1046 let custom_aligner = NeedlemanWunsch::with_scores(2, -2, -1);
1048 let result = custom_aligner.align("CAT", "CART");
1049 assert!(result.score > 0);
1050 }
1051
1052 #[test]
1053 fn test_smith_waterman() {
1054 let aligner = SmithWaterman::new();
1055
1056 let result = aligner.align("GGTTGACTA", "TGTTACGG");
1058 assert!(result.score > 0);
1059 assert!(result.aligned_seq1.contains("GTT"));
1060 assert!(result.aligned_seq2.contains("GTT"));
1061
1062 let result = aligner.align("ABCDEFG", "XYZCDEFPQ");
1064 assert_eq!(result.aligned_seq1, "CDEF");
1065 assert_eq!(result.aligned_seq2, "CDEF");
1066 assert_eq!(result.score, 8); let result = aligner.align("", "ABC");
1070 assert_eq!(result.aligned_seq1, "");
1071 assert_eq!(result.aligned_seq2, "");
1072 assert_eq!(result.score, 0);
1073
1074 let result = aligner.align("AAA", "BBB");
1076 assert_eq!(result.score, 0);
1077
1078 let custom_aligner = SmithWaterman::with_scores(3, -3, -2);
1080 let result = custom_aligner.align("ACACACTA", "AGCACACA");
1081 assert!(result.score > 0);
1082 }
1083}