1use std::collections::HashSet;
2
3use crate::core::contig::Contig;
4use crate::core::header::QueryHeader;
5use crate::core::reference::KnownReference;
6use crate::core::types::Confidence;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum ContigMatchType {
11 Exact,
13 NameLengthNoMd5,
15 Md5Conflict,
17 Unmatched,
19}
20
21#[inline]
27fn count_to_f64(count: usize) -> f64 {
28 #[allow(clippy::cast_precision_loss)]
29 {
30 count as f64
31 }
32}
33
34#[derive(Debug, Clone)]
36pub struct MatchScore {
37 pub composite: f64,
39
40 pub confidence: Confidence,
42
43 pub exact_matches: usize,
46
47 pub name_length_matches: usize,
49
50 pub md5_conflicts: usize,
52
53 pub unmatched: usize,
55
56 pub match_quality: f64,
59
60 pub coverage_score: f64,
62
63 pub order_score: f64,
65
66 pub order_preserved: bool,
68
69 pub md5_jaccard: f64,
72
73 pub name_length_jaccard: f64,
75
76 pub md5_query_coverage: f64,
78
79 pub name_length_query_coverage: f64,
81}
82
83impl MatchScore {
84 #[must_use]
94 pub fn calculate(query: &QueryHeader, reference: &KnownReference) -> Self {
95 let mut exact_matches = 0usize;
97 let mut name_length_matches = 0usize; let mut md5_conflicts = 0usize;
99 let mut unmatched = 0usize;
100
101 for contig in &query.contigs {
102 match classify_contig_match(contig, reference) {
103 ContigMatchType::Exact => exact_matches += 1,
104 ContigMatchType::NameLengthNoMd5 => name_length_matches += 1,
105 ContigMatchType::Md5Conflict => md5_conflicts += 1,
106 ContigMatchType::Unmatched => unmatched += 1,
107 }
108 }
109
110 let total_query = query.contigs.len();
111
112 let weighted_matches = count_to_f64(exact_matches)
115 + count_to_f64(name_length_matches)
116 + (count_to_f64(md5_conflicts) * 0.1);
117
118 let match_quality = if total_query > 0 {
119 weighted_matches / count_to_f64(total_query)
120 } else {
121 0.0
122 };
123
124 let good_matches = exact_matches + name_length_matches;
126 let coverage_score = if reference.contigs.is_empty() {
127 0.0
128 } else {
129 (count_to_f64(good_matches) / count_to_f64(reference.contigs.len())).min(1.0)
130 };
131
132 let (order_preserved, order_score) = analyze_order(query, reference);
134
135 let composite = ((0.70 * match_quality) + (0.20 * coverage_score) + (0.10 * order_score))
138 .clamp(0.0, 1.0);
139
140 let confidence = Confidence::from_score(composite);
141
142 let md5_jaccard = jaccard_similarity(&query.md5_set, &reference.md5_set);
144 let (name_length_jaccard, name_length_query_coverage) =
145 calculate_name_length_similarity_with_aliases(query, reference);
146 let md5_query_coverage = if query.md5_set.is_empty() {
147 0.0
148 } else {
149 count_to_f64(query.md5_set.intersection(&reference.md5_set).count())
150 / count_to_f64(query.md5_set.len())
151 };
152
153 Self {
154 composite,
155 confidence,
156 exact_matches,
157 name_length_matches,
158 md5_conflicts,
159 unmatched,
160 match_quality,
161 coverage_score,
162 order_score,
163 order_preserved,
164 md5_jaccard,
165 name_length_jaccard,
166 md5_query_coverage,
167 name_length_query_coverage,
168 }
169 }
170
171 #[must_use]
179 pub fn calculate_with_weights(
180 query: &QueryHeader,
181 reference: &KnownReference,
182 weights: &crate::matching::engine::ScoringWeights,
183 ) -> Self {
184 let mut exact_matches = 0usize;
186 let mut name_length_matches = 0usize;
187 let mut md5_conflicts = 0usize;
188 let mut unmatched = 0usize;
189
190 for contig in &query.contigs {
191 match classify_contig_match(contig, reference) {
192 ContigMatchType::Exact => exact_matches += 1,
193 ContigMatchType::NameLengthNoMd5 => name_length_matches += 1,
194 ContigMatchType::Md5Conflict => md5_conflicts += 1,
195 ContigMatchType::Unmatched => unmatched += 1,
196 }
197 }
198
199 let total_query = query.contigs.len();
200
201 let normalized_weights = weights.normalized();
203
204 let weighted_matches = count_to_f64(exact_matches)
206 + count_to_f64(name_length_matches)
207 + (count_to_f64(md5_conflicts) * normalized_weights.conflict_penalty);
208
209 let match_quality = if total_query > 0 {
210 weighted_matches / count_to_f64(total_query)
211 } else {
212 0.0
213 };
214
215 let good_matches = exact_matches + name_length_matches;
217 let coverage_score = if reference.contigs.is_empty() {
218 0.0
219 } else {
220 (count_to_f64(good_matches) / count_to_f64(reference.contigs.len())).min(1.0)
221 };
222
223 let (order_preserved, order_score) = analyze_order(query, reference);
225
226 let composite = ((normalized_weights.contig_match * match_quality)
229 + (normalized_weights.coverage * coverage_score)
230 + (normalized_weights.order * order_score))
231 .clamp(0.0, 1.0);
232
233 let confidence = Confidence::from_score(composite);
234
235 let md5_jaccard = jaccard_similarity(&query.md5_set, &reference.md5_set);
237 let (name_length_jaccard, name_length_query_coverage) =
238 calculate_name_length_similarity_with_aliases(query, reference);
239 let md5_query_coverage = if query.md5_set.is_empty() {
240 0.0
241 } else {
242 count_to_f64(query.md5_set.intersection(&reference.md5_set).count())
243 / count_to_f64(query.md5_set.len())
244 };
245
246 Self {
247 composite,
248 confidence,
249 exact_matches,
250 name_length_matches,
251 md5_conflicts,
252 unmatched,
253 match_quality,
254 coverage_score,
255 order_score,
256 order_preserved,
257 md5_jaccard,
258 name_length_jaccard,
259 md5_query_coverage,
260 name_length_query_coverage,
261 }
262 }
263}
264
265fn jaccard_similarity<T: Eq + std::hash::Hash>(a: &HashSet<T>, b: &HashSet<T>) -> f64 {
270 let intersection = a.intersection(b).count();
271 let union = a.union(b).count();
272 if union == 0 {
273 0.0
275 } else {
276 count_to_f64(intersection) / count_to_f64(union)
277 }
278}
279
280fn calculate_name_length_similarity_with_aliases(
288 query: &QueryHeader,
289 reference: &KnownReference,
290) -> (f64, f64) {
291 let mut matched_query_contigs = 0usize;
293 let mut matched_ref_keys: HashSet<(String, u64)> = HashSet::new();
294
295 for contig in &query.contigs {
296 let name_key = (contig.name.clone(), contig.length);
297
298 if reference.name_length_set.contains(&name_key) {
300 matched_query_contigs += 1;
301 matched_ref_keys.insert(name_key);
302 continue;
303 }
304
305 for alias in &contig.aliases {
307 let alias_key = (alias.clone(), contig.length);
308 if reference.name_length_set.contains(&alias_key) {
309 matched_query_contigs += 1;
310 matched_ref_keys.insert(alias_key);
311 break;
312 }
313 }
314 }
315
316 let query_count = query.contigs.len();
319 let ref_count = reference.contigs.len();
320 let union_size = query_count + ref_count - matched_ref_keys.len();
321
322 let jaccard = if union_size == 0 {
323 0.0
324 } else {
325 count_to_f64(matched_ref_keys.len()) / count_to_f64(union_size)
326 };
327
328 let coverage = if query_count == 0 {
329 0.0
330 } else {
331 count_to_f64(matched_query_contigs) / count_to_f64(query_count)
332 };
333
334 (jaccard, coverage)
335}
336
337fn analyze_order(query: &QueryHeader, reference: &KnownReference) -> (bool, f64) {
347 use std::collections::HashMap;
348
349 let mut ref_positions: HashMap<(&str, u64), usize> = HashMap::new();
352 for (i, contig) in reference.contigs.iter().enumerate() {
353 ref_positions.insert((contig.name.as_str(), contig.length), i);
355 for alias in &contig.aliases {
357 ref_positions.insert((alias.as_str(), contig.length), i);
358 }
359 }
360
361 let mut positions: Vec<usize> = Vec::new();
364 for contig in &query.contigs {
365 let key = (contig.name.as_str(), contig.length);
367 if let Some(&pos) = ref_positions.get(&key) {
368 positions.push(pos);
369 continue;
370 }
371
372 for alias in &contig.aliases {
374 let alias_key = (alias.as_str(), contig.length);
375 if let Some(&pos) = ref_positions.get(&alias_key) {
376 positions.push(pos);
377 break;
378 }
379 }
380 }
381
382 if positions.len() < 2 {
384 return (true, 0.0);
387 }
388
389 let is_sorted = positions.windows(2).all(|w| w[0] < w[1]);
391
392 let lis_len = longest_increasing_subsequence(&positions);
394 let order_score = count_to_f64(lis_len) / count_to_f64(positions.len());
395
396 (is_sorted, order_score)
397}
398
399fn longest_increasing_subsequence(arr: &[usize]) -> usize {
403 if arr.is_empty() {
404 return 0;
405 }
406
407 let mut dp = vec![1usize; arr.len()];
409
410 for i in 1..arr.len() {
411 for j in 0..i {
412 if arr[j] < arr[i] {
413 dp[i] = dp[i].max(dp[j] + 1);
414 }
415 }
416 }
417
418 dp.into_iter().max().expect("dp is non-empty")
420}
421
422fn classify_contig_match(query_contig: &Contig, reference: &KnownReference) -> ContigMatchType {
430 let matched_ref_contig = find_matching_reference_contig(query_contig, reference);
432
433 match matched_ref_contig {
434 None => ContigMatchType::Unmatched,
435 Some(ref_contig) => {
436 match (&query_contig.md5, &ref_contig.md5) {
438 (Some(q_md5), Some(r_md5)) => {
440 if q_md5.eq_ignore_ascii_case(r_md5) {
441 ContigMatchType::Exact
442 } else {
443 ContigMatchType::Md5Conflict
445 }
446 }
447 _ => ContigMatchType::NameLengthNoMd5,
449 }
450 }
451 }
452}
453
454fn find_matching_reference_contig<'a>(
457 query_contig: &Contig,
458 reference: &'a KnownReference,
459) -> Option<&'a Contig> {
460 let query_key = (query_contig.name.clone(), query_contig.length);
462 if reference.name_length_set.contains(&query_key) {
463 for ref_contig in &reference.contigs {
465 if ref_contig.name == query_contig.name && ref_contig.length == query_contig.length {
466 return Some(ref_contig);
467 }
468 for alias in &ref_contig.aliases {
470 if alias == &query_contig.name && ref_contig.length == query_contig.length {
471 return Some(ref_contig);
472 }
473 }
474 }
475 }
476
477 for query_alias in &query_contig.aliases {
479 let alias_key = (query_alias.clone(), query_contig.length);
480 if reference.name_length_set.contains(&alias_key) {
481 for ref_contig in &reference.contigs {
483 if ref_contig.name == *query_alias && ref_contig.length == query_contig.length {
484 return Some(ref_contig);
485 }
486 for ref_alias in &ref_contig.aliases {
487 if ref_alias == query_alias && ref_contig.length == query_contig.length {
488 return Some(ref_contig);
489 }
490 }
491 }
492 }
493 }
494
495 None
496}
497
498#[cfg(test)]
499mod tests {
500 use super::*;
501
502 #[test]
503 fn test_jaccard_similarity() {
504 let a: HashSet<i32> = [1, 2, 3].into_iter().collect();
505 let b: HashSet<i32> = [2, 3, 4].into_iter().collect();
506
507 let similarity = jaccard_similarity(&a, &b);
508 assert!((similarity - 0.5).abs() < 0.001);
510
511 let empty1: HashSet<i32> = HashSet::new();
513 let empty2: HashSet<i32> = HashSet::new();
514 assert!((jaccard_similarity(&empty1, &empty2) - 0.0).abs() < 0.001);
515
516 assert!((jaccard_similarity(&a, &empty1) - 0.0).abs() < 0.001);
518
519 let c: HashSet<i32> = [1, 2, 3].into_iter().collect();
521 assert!((jaccard_similarity(&a, &c) - 1.0).abs() < 0.001);
522 }
523
524 #[test]
525 fn test_longest_increasing_subsequence() {
526 assert_eq!(longest_increasing_subsequence(&[1, 2, 3, 4, 5]), 5);
527 assert_eq!(longest_increasing_subsequence(&[5, 4, 3, 2, 1]), 1);
528 assert_eq!(longest_increasing_subsequence(&[1, 3, 2, 4, 5]), 4);
529 assert_eq!(longest_increasing_subsequence(&[]), 0);
530 }
531
532 #[test]
533 fn test_composite_score_never_exceeds_one() {
534 use crate::core::contig::Contig;
535 use crate::core::reference::KnownReference;
536 use crate::core::types::{Assembly, ReferenceSource};
537
538 let ref_contigs = vec![
540 Contig::new("chr1", 1000),
541 Contig::new("chr2", 2000),
542 Contig::new("chr3", 3000),
543 ];
544 let reference = KnownReference::new(
545 "test_ref",
546 "Test Reference",
547 Assembly::Grch38,
548 ReferenceSource::Custom("test".to_string()),
549 )
550 .with_contigs(ref_contigs);
551
552 let query_no_md5 = QueryHeader::new(vec![
554 Contig::new("chr1", 1000),
555 Contig::new("chr2", 2000),
556 Contig::new("chr3", 3000),
557 ]);
558
559 let score_no_md5 = MatchScore::calculate(&query_no_md5, &reference);
560 assert!(
561 score_no_md5.composite <= 1.0,
562 "Composite score without MD5 should not exceed 1.0, got {}",
563 score_no_md5.composite
564 );
565 assert!(
566 score_no_md5.composite >= 0.0,
567 "Composite score should not be negative, got {}",
568 score_no_md5.composite
569 );
570
571 let query_with_md5 = QueryHeader::new(vec![
573 Contig::new("chr1", 1000).with_md5("abc123"),
574 Contig::new("chr2", 2000).with_md5("def456"),
575 Contig::new("chr3", 3000).with_md5("ghi789"),
576 ]);
577
578 let score_with_md5 = MatchScore::calculate(&query_with_md5, &reference);
579 assert!(
580 score_with_md5.composite <= 1.0,
581 "Composite score with MD5 should not exceed 1.0, got {}",
582 score_with_md5.composite
583 );
584 assert!(
585 score_with_md5.composite >= 0.0,
586 "Composite score should not be negative, got {}",
587 score_with_md5.composite
588 );
589 }
590
591 #[test]
592 fn test_composite_score_with_custom_weights_never_exceeds_one() {
593 use crate::core::contig::Contig;
594 use crate::core::reference::KnownReference;
595 use crate::core::types::{Assembly, ReferenceSource};
596 use crate::matching::engine::ScoringWeights;
597
598 let ref_contigs = vec![Contig::new("chr1", 1000), Contig::new("chr2", 2000)];
599 let reference = KnownReference::new(
600 "test_ref",
601 "Test Reference",
602 Assembly::Grch38,
603 ReferenceSource::Custom("test".to_string()),
604 )
605 .with_contigs(ref_contigs);
606
607 let query = QueryHeader::new(vec![Contig::new("chr1", 1000), Contig::new("chr2", 2000)]);
608
609 let weight_configs = vec![
611 ScoringWeights {
612 contig_match: 0.7,
613 coverage: 0.2,
614 order: 0.1,
615 conflict_penalty: 0.1,
616 },
617 ScoringWeights {
618 contig_match: 0.5,
619 coverage: 0.3,
620 order: 0.2,
621 conflict_penalty: 0.0,
622 },
623 ScoringWeights {
624 contig_match: 1.0,
625 coverage: 1.0,
626 order: 1.0,
627 conflict_penalty: 0.5,
628 },
629 ];
630
631 for weights in weight_configs {
632 let score = MatchScore::calculate_with_weights(&query, &reference, &weights);
633 assert!(
634 score.composite <= 1.0,
635 "Composite score should not exceed 1.0 with weights {:?}, got {}",
636 weights,
637 score.composite
638 );
639 assert!(
640 score.composite >= 0.0,
641 "Composite score should not be negative with weights {:?}, got {}",
642 weights,
643 score.composite
644 );
645 }
646 }
647
648 #[test]
655 fn test_alias_match_query_name_to_ref_name() {
656 use crate::core::contig::Contig;
657 use crate::core::reference::KnownReference;
658 use crate::core::types::{Assembly, ReferenceSource};
659
660 let ref_contigs = vec![
662 Contig::new("chr1", 248_956_422),
663 Contig::new("chr2", 242_193_529),
664 ];
665 let reference = KnownReference::new(
666 "test_ref",
667 "Test Reference",
668 Assembly::Grch38,
669 ReferenceSource::Custom("test".to_string()),
670 )
671 .with_contigs(ref_contigs);
672
673 let query = QueryHeader::new(vec![
674 Contig::new("chr1", 248_956_422),
675 Contig::new("chr2", 242_193_529),
676 ]);
677
678 let score = MatchScore::calculate(&query, &reference);
679 assert!(
680 (score.name_length_jaccard - 1.0).abs() < 0.001,
681 "Direct name match should have jaccard = 1.0, got {}",
682 score.name_length_jaccard
683 );
684 }
685
686 #[test]
687 fn test_alias_match_query_name_to_ref_alias() {
688 use crate::core::contig::Contig;
689 use crate::core::reference::KnownReference;
690 use crate::core::types::{Assembly, ReferenceSource};
691
692 let ref_contigs = vec![
694 Contig::new("NC_000001.11", 248_956_422)
695 .with_aliases(vec!["chr1".to_string(), "1".to_string()]),
696 Contig::new("NC_000002.12", 242_193_529)
697 .with_aliases(vec!["chr2".to_string(), "2".to_string()]),
698 ];
699 let reference = KnownReference::new(
700 "test_ncbi_ref",
701 "Test NCBI Reference",
702 Assembly::Grch38,
703 ReferenceSource::Custom("test".to_string()),
704 )
705 .with_contigs(ref_contigs);
706
707 let query = QueryHeader::new(vec![
709 Contig::new("chr1", 248_956_422),
710 Contig::new("chr2", 242_193_529),
711 ]);
712
713 let score = MatchScore::calculate(&query, &reference);
714 assert!(
715 (score.name_length_jaccard - 1.0).abs() < 0.001,
716 "Query name matching ref alias should have jaccard = 1.0, got {}",
717 score.name_length_jaccard
718 );
719 }
720
721 #[test]
722 fn test_alias_match_query_alias_to_ref_name() {
723 use crate::core::contig::Contig;
724 use crate::core::reference::KnownReference;
725 use crate::core::types::{Assembly, ReferenceSource};
726
727 let ref_contigs = vec![
729 Contig::new("chr1", 248_956_422),
730 Contig::new("chr2", 242_193_529),
731 ];
732 let reference = KnownReference::new(
733 "test_ucsc_ref",
734 "Test UCSC Reference",
735 Assembly::Grch38,
736 ReferenceSource::Custom("test".to_string()),
737 )
738 .with_contigs(ref_contigs);
739
740 let query = QueryHeader::new(vec![
742 Contig::new("NC_000001.11", 248_956_422)
743 .with_aliases(vec!["chr1".to_string(), "1".to_string()]),
744 Contig::new("NC_000002.12", 242_193_529)
745 .with_aliases(vec!["chr2".to_string(), "2".to_string()]),
746 ]);
747
748 let score = MatchScore::calculate(&query, &reference);
749 assert!(
750 (score.name_length_jaccard - 1.0).abs() < 0.001,
751 "Query alias matching ref name should have jaccard = 1.0, got {}",
752 score.name_length_jaccard
753 );
754 }
755
756 #[test]
757 fn test_alias_match_query_alias_to_ref_alias() {
758 use crate::core::contig::Contig;
759 use crate::core::reference::KnownReference;
760 use crate::core::types::{Assembly, ReferenceSource};
761
762 let ref_contigs = vec![
765 Contig::new("NC_000001.11", 248_956_422)
766 .with_aliases(vec!["chr1".to_string(), "CM000663.2".to_string()]),
767 Contig::new("NC_000002.12", 242_193_529)
768 .with_aliases(vec!["chr2".to_string(), "CM000664.2".to_string()]),
769 ];
770 let reference = KnownReference::new(
771 "test_ncbi_ref_1",
772 "Test NCBI Reference 1",
773 Assembly::Grch38,
774 ReferenceSource::Custom("test".to_string()),
775 )
776 .with_contigs(ref_contigs);
777
778 let query = QueryHeader::new(vec![
780 Contig::new("CM000663.2", 248_956_422)
781 .with_aliases(vec!["chr1".to_string(), "NC_000001.11".to_string()]),
782 Contig::new("CM000664.2", 242_193_529)
783 .with_aliases(vec!["chr2".to_string(), "NC_000002.12".to_string()]),
784 ]);
785
786 let score = MatchScore::calculate(&query, &reference);
787 assert!(
788 (score.name_length_jaccard - 1.0).abs() < 0.001,
789 "Query alias matching ref alias should have jaccard = 1.0, got {}",
790 score.name_length_jaccard
791 );
792 }
793
794 #[test]
799 fn test_perfect_name_length_match_no_md5_gets_high_score() {
800 use crate::core::contig::Contig;
801 use crate::core::reference::KnownReference;
802 use crate::core::types::{Assembly, ReferenceSource};
803
804 let ref_contigs = vec![
806 Contig::new("chr1", 248_956_422),
807 Contig::new("chr2", 242_193_529),
808 Contig::new("chr3", 198_295_559),
809 ];
810 let reference = KnownReference::new(
811 "test_ref",
812 "Test Reference",
813 Assembly::Grch38,
814 ReferenceSource::Custom("test".to_string()),
815 )
816 .with_contigs(ref_contigs);
817
818 let query = QueryHeader::new(vec![
820 Contig::new("chr1", 248_956_422),
821 Contig::new("chr2", 242_193_529),
822 Contig::new("chr3", 198_295_559),
823 ]);
824
825 let score = MatchScore::calculate(&query, &reference);
826
827 assert_eq!(
829 score.exact_matches, 0,
830 "No exact matches (no MD5 available)"
831 );
832 assert_eq!(
833 score.name_length_matches, 3,
834 "All 3 contigs should be name+length matches"
835 );
836 assert_eq!(score.md5_conflicts, 0, "No conflicts");
837 assert_eq!(score.unmatched, 0, "No unmatched");
838
839 assert!(
841 (score.match_quality - 1.0).abs() < 0.001,
842 "Contig match score should be 1.0, got {}",
843 score.match_quality
844 );
845
846 assert!(
848 (score.coverage_score - 1.0).abs() < 0.001,
849 "Coverage score should be 1.0, got {}",
850 score.coverage_score
851 );
852
853 assert!(
856 score.composite > 0.9,
857 "100% name+length match with no MD5 should score > 90%, got {:.1}%",
858 score.composite * 100.0
859 );
860 }
861
862 #[test]
863 fn test_md5_conflict_gets_heavy_penalty() {
864 use crate::core::contig::Contig;
865 use crate::core::reference::KnownReference;
866 use crate::core::types::{Assembly, ReferenceSource};
867
868 let ref_contigs = vec![
870 Contig::new("chr1", 248_956_422).with_md5("abc123"),
871 Contig::new("chr2", 242_193_529).with_md5("def456"),
872 ];
873 let reference = KnownReference::new(
874 "test_ref",
875 "Test Reference",
876 Assembly::Grch38,
877 ReferenceSource::Custom("test".to_string()),
878 )
879 .with_contigs(ref_contigs);
880
881 let query = QueryHeader::new(vec![
883 Contig::new("chr1", 248_956_422).with_md5("DIFFERENT1"),
884 Contig::new("chr2", 242_193_529).with_md5("DIFFERENT2"),
885 ]);
886
887 let score = MatchScore::calculate(&query, &reference);
888
889 assert_eq!(score.exact_matches, 0, "No exact matches");
891 assert_eq!(score.name_length_matches, 0, "No name+length-only matches");
892 assert_eq!(score.md5_conflicts, 2, "Both contigs have MD5 conflicts");
893 assert_eq!(score.unmatched, 0, "No unmatched");
894
895 assert!(
897 (score.match_quality - 0.1).abs() < 0.001,
898 "Contig match score for conflicts should be 0.1, got {}",
899 score.match_quality
900 );
901
902 assert!(
904 score.composite < 0.2,
905 "MD5 conflicts should result in low score, got {:.1}%",
906 score.composite * 100.0
907 );
908 }
909
910 #[test]
911 fn test_mixed_match_types_weighted_correctly() {
912 use crate::core::contig::Contig;
913 use crate::core::reference::KnownReference;
914 use crate::core::types::{Assembly, ReferenceSource};
915
916 let ref_contigs = vec![
918 Contig::new("chr1", 248_956_422).with_md5("correct_md5_1"),
919 Contig::new("chr2", 242_193_529).with_md5("correct_md5_2"),
920 Contig::new("chr3", 198_295_559), ];
922 let reference = KnownReference::new(
923 "test_ref",
924 "Test Reference",
925 Assembly::Grch38,
926 ReferenceSource::Custom("test".to_string()),
927 )
928 .with_contigs(ref_contigs);
929
930 let query = QueryHeader::new(vec![
936 Contig::new("chr1", 248_956_422).with_md5("correct_md5_1"),
937 Contig::new("chr2", 242_193_529).with_md5("WRONG_MD5"),
938 Contig::new("chr3", 198_295_559),
939 Contig::new("chr4", 100_000), ]);
941
942 let score = MatchScore::calculate(&query, &reference);
943
944 assert_eq!(score.exact_matches, 1, "1 exact match (chr1)");
945 assert_eq!(score.name_length_matches, 1, "1 name+length match (chr3)");
946 assert_eq!(score.md5_conflicts, 1, "1 conflict (chr2)");
947 assert_eq!(score.unmatched, 1, "1 unmatched (chr4)");
948
949 let expected_match_score = (1.0 + 1.0 + 0.1) / 4.0;
951 assert!(
952 (score.match_quality - expected_match_score).abs() < 0.001,
953 "Expected match_quality = {}, got {}",
954 expected_match_score,
955 score.match_quality
956 );
957 }
958
959 #[test]
960 fn test_classify_contig_match_exact() {
961 use crate::core::contig::Contig;
962 use crate::core::reference::KnownReference;
963 use crate::core::types::{Assembly, ReferenceSource};
964
965 let ref_contigs = vec![Contig::new("chr1", 1000).with_md5("abc123")];
966 let reference = KnownReference::new(
967 "test",
968 "Test",
969 Assembly::Grch38,
970 ReferenceSource::Custom("test".to_string()),
971 )
972 .with_contigs(ref_contigs);
973
974 let query_contig = Contig::new("chr1", 1000).with_md5("abc123");
975 assert_eq!(
976 classify_contig_match(&query_contig, &reference),
977 ContigMatchType::Exact
978 );
979 }
980
981 #[test]
982 fn test_classify_contig_match_no_md5() {
983 use crate::core::contig::Contig;
984 use crate::core::reference::KnownReference;
985 use crate::core::types::{Assembly, ReferenceSource};
986
987 let ref_contigs = vec![Contig::new("chr1", 1000)]; let reference = KnownReference::new(
989 "test",
990 "Test",
991 Assembly::Grch38,
992 ReferenceSource::Custom("test".to_string()),
993 )
994 .with_contigs(ref_contigs);
995
996 let query_contig = Contig::new("chr1", 1000); assert_eq!(
998 classify_contig_match(&query_contig, &reference),
999 ContigMatchType::NameLengthNoMd5
1000 );
1001 }
1002
1003 #[test]
1004 fn test_classify_contig_match_md5_conflict() {
1005 use crate::core::contig::Contig;
1006 use crate::core::reference::KnownReference;
1007 use crate::core::types::{Assembly, ReferenceSource};
1008
1009 let ref_contigs = vec![Contig::new("chr1", 1000).with_md5("abc123")];
1010 let reference = KnownReference::new(
1011 "test",
1012 "Test",
1013 Assembly::Grch38,
1014 ReferenceSource::Custom("test".to_string()),
1015 )
1016 .with_contigs(ref_contigs);
1017
1018 let query_contig = Contig::new("chr1", 1000).with_md5("DIFFERENT");
1019 assert_eq!(
1020 classify_contig_match(&query_contig, &reference),
1021 ContigMatchType::Md5Conflict
1022 );
1023 }
1024
1025 #[test]
1026 fn test_classify_contig_match_unmatched() {
1027 use crate::core::contig::Contig;
1028 use crate::core::reference::KnownReference;
1029 use crate::core::types::{Assembly, ReferenceSource};
1030
1031 let ref_contigs = vec![Contig::new("chr1", 1000)];
1032 let reference = KnownReference::new(
1033 "test",
1034 "Test",
1035 Assembly::Grch38,
1036 ReferenceSource::Custom("test".to_string()),
1037 )
1038 .with_contigs(ref_contigs);
1039
1040 let query_contig = Contig::new("chr2", 1000);
1042 assert_eq!(
1043 classify_contig_match(&query_contig, &reference),
1044 ContigMatchType::Unmatched
1045 );
1046
1047 let query_contig2 = Contig::new("chr1", 2000);
1049 assert_eq!(
1050 classify_contig_match(&query_contig2, &reference),
1051 ContigMatchType::Unmatched
1052 );
1053 }
1054}