ref_solver/core/
reference.rs1use serde::{Deserialize, Serialize};
2use std::collections::HashSet;
3
4use crate::core::contig::{detect_naming_convention, Contig, SequenceRole};
5use crate::core::types::{Assembly, NamingConvention, ReferenceId, ReferenceSource};
6use crate::utils::validation::compute_signature as compute_sig;
7
8#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct KnownReference {
11 pub id: ReferenceId,
13
14 pub display_name: String,
16
17 pub assembly: Assembly,
19
20 pub source: ReferenceSource,
22
23 pub naming_convention: NamingConvention,
25
26 #[serde(default, skip_serializing_if = "Option::is_none")]
28 pub download_url: Option<String>,
29
30 #[serde(default, skip_serializing_if = "Option::is_none")]
32 pub assembly_report_url: Option<String>,
33
34 pub contigs: Vec<Contig>,
36
37 #[serde(default, skip_serializing_if = "Option::is_none")]
39 pub description: Option<String>,
40
41 #[serde(default, skip_serializing_if = "Vec::is_empty")]
43 pub tags: Vec<String>,
44
45 #[serde(default, skip_serializing_if = "Vec::is_empty")]
48 pub contigs_missing_from_fasta: Vec<String>,
49
50 #[serde(skip)]
53 pub md5_set: HashSet<String>,
54
55 #[serde(skip)]
57 pub name_length_set: HashSet<(String, u64)>,
58
59 #[serde(skip)]
61 pub signature: Option<String>,
62}
63
64impl KnownReference {
65 pub fn new(
66 id: impl Into<String>,
67 display_name: impl Into<String>,
68 assembly: Assembly,
69 source: ReferenceSource,
70 ) -> Self {
71 Self {
72 id: ReferenceId::new(id),
73 display_name: display_name.into(),
74 assembly,
75 source,
76 naming_convention: NamingConvention::Mixed,
77 download_url: None,
78 assembly_report_url: None,
79 contigs: Vec::new(),
80 description: None,
81 tags: Vec::new(),
82 contigs_missing_from_fasta: Vec::new(),
83 md5_set: HashSet::new(),
84 name_length_set: HashSet::new(),
85 signature: None,
86 }
87 }
88
89 #[must_use]
90 pub fn with_contigs(mut self, contigs: Vec<Contig>) -> Self {
91 self.naming_convention = detect_naming_convention(&contigs);
92 self.contigs = contigs;
93 self.rebuild_indexes();
94 self
95 }
96
97 pub fn rebuild_indexes(&mut self) {
99 self.md5_set.clear();
100 self.name_length_set.clear();
101
102 for contig in &self.contigs {
103 if let Some(md5) = &contig.md5 {
104 self.md5_set.insert(md5.clone());
105 }
106 self.name_length_set
108 .insert((contig.name.clone(), contig.length));
109
110 for alias in &contig.aliases {
112 self.name_length_set.insert((alias.clone(), contig.length));
113 }
114 }
115
116 self.signature = self.compute_signature();
118 }
119
120 fn compute_signature(&self) -> Option<String> {
123 let sig = compute_sig(&self.md5_set);
124 if sig.is_empty() {
125 None
126 } else {
127 Some(sig)
128 }
129 }
130
131 #[must_use]
133 pub fn has_decoy(&self) -> bool {
134 self.contigs.iter().any(super::contig::Contig::is_decoy)
135 }
136
137 #[must_use]
139 pub fn has_alt(&self) -> bool {
140 self.contigs.iter().any(super::contig::Contig::is_alt)
141 }
142
143 #[must_use]
145 pub fn role_counts(&self) -> RoleCounts {
146 let mut counts = RoleCounts::default();
147 for contig in &self.contigs {
148 match contig.sequence_role {
149 SequenceRole::AssembledMolecule => counts.assembled_molecule += 1,
150 SequenceRole::AltScaffold => counts.alt_scaffold += 1,
151 SequenceRole::FixPatch => counts.fix_patch += 1,
152 SequenceRole::NovelPatch => counts.novel_patch += 1,
153 SequenceRole::UnlocalizedScaffold => counts.unlocalized_scaffold += 1,
154 SequenceRole::UnplacedScaffold => counts.unplaced_scaffold += 1,
155 SequenceRole::Unknown => counts.unknown += 1,
156 }
157 }
158 counts
159 }
160}
161
162#[derive(Debug, Clone, Default, Serialize, Deserialize)]
164pub struct RoleCounts {
165 pub assembled_molecule: usize,
166 pub alt_scaffold: usize,
167 pub fix_patch: usize,
168 pub novel_patch: usize,
169 pub unlocalized_scaffold: usize,
170 pub unplaced_scaffold: usize,
171 pub unknown: usize,
172}