ref_solver/core/
reference.rs1use serde::{Deserialize, Serialize};
2use std::collections::HashSet;
3
4use crate::core::contig::{detect_naming_convention, Contig, SequenceRole};
5use crate::core::types::{Assembly, NamingConvention, ReferenceId, ReferenceSource};
6use crate::utils::validation::compute_signature as compute_sig;
7
8#[derive(Debug, Clone, Serialize, Deserialize)]
10#[non_exhaustive]
11pub struct KnownReference {
12 pub id: ReferenceId,
14
15 pub display_name: String,
17
18 pub assembly: Assembly,
20
21 pub source: ReferenceSource,
23
24 pub naming_convention: NamingConvention,
26
27 #[serde(default, skip_serializing_if = "Option::is_none")]
29 pub download_url: Option<String>,
30
31 #[serde(default, skip_serializing_if = "Option::is_none")]
33 pub assembly_report_url: Option<String>,
34
35 pub contigs: Vec<Contig>,
37
38 #[serde(default, skip_serializing_if = "Option::is_none")]
40 pub description: Option<String>,
41
42 #[serde(default, skip_serializing_if = "Vec::is_empty")]
44 pub tags: Vec<String>,
45
46 #[serde(default, skip_serializing_if = "Vec::is_empty")]
49 pub contigs_missing_from_fasta: Vec<String>,
50
51 #[serde(skip)]
54 pub md5_set: HashSet<String>,
55
56 #[serde(skip)]
58 pub sha512t24u_set: HashSet<String>,
59
60 #[serde(skip)]
62 pub name_length_set: HashSet<(String, u64)>,
63
64 #[serde(skip)]
66 pub signature: Option<String>,
67}
68
69impl KnownReference {
70 pub fn new(
71 id: impl Into<String>,
72 display_name: impl Into<String>,
73 assembly: Assembly,
74 source: ReferenceSource,
75 ) -> Self {
76 Self {
77 id: ReferenceId::new(id),
78 display_name: display_name.into(),
79 assembly,
80 source,
81 naming_convention: NamingConvention::Mixed,
82 download_url: None,
83 assembly_report_url: None,
84 contigs: Vec::new(),
85 description: None,
86 tags: Vec::new(),
87 contigs_missing_from_fasta: Vec::new(),
88 md5_set: HashSet::new(),
89 sha512t24u_set: HashSet::new(),
90 name_length_set: HashSet::new(),
91 signature: None,
92 }
93 }
94
95 #[must_use]
96 pub fn with_contigs(mut self, contigs: Vec<Contig>) -> Self {
97 self.naming_convention = detect_naming_convention(&contigs);
98 self.contigs = contigs;
99 self.rebuild_indexes();
100 self
101 }
102
103 pub fn rebuild_indexes(&mut self) {
105 self.md5_set.clear();
106 self.sha512t24u_set.clear();
107 self.name_length_set.clear();
108
109 for contig in &self.contigs {
110 if let Some(md5) = &contig.md5 {
111 self.md5_set.insert(md5.clone());
112 }
113 if let Some(digest) = &contig.sha512t24u {
114 self.sha512t24u_set.insert(digest.clone());
115 }
116 self.name_length_set
118 .insert((contig.name.clone(), contig.length));
119
120 for alias in &contig.aliases {
122 self.name_length_set.insert((alias.clone(), contig.length));
123 }
124 }
125
126 self.signature = self.compute_signature();
128 }
129
130 fn compute_signature(&self) -> Option<String> {
133 let sig = compute_sig(&self.md5_set);
134 if sig.is_empty() {
135 None
136 } else {
137 Some(sig)
138 }
139 }
140
141 #[must_use]
143 pub fn has_decoy(&self) -> bool {
144 self.contigs.iter().any(super::contig::Contig::is_decoy)
145 }
146
147 #[must_use]
149 pub fn has_alt(&self) -> bool {
150 self.contigs.iter().any(super::contig::Contig::is_alt)
151 }
152
153 #[must_use]
155 pub fn role_counts(&self) -> RoleCounts {
156 let mut counts = RoleCounts::default();
157 for contig in &self.contigs {
158 match contig.sequence_role {
159 SequenceRole::AssembledMolecule => counts.assembled_molecule += 1,
160 SequenceRole::AltScaffold => counts.alt_scaffold += 1,
161 SequenceRole::FixPatch => counts.fix_patch += 1,
162 SequenceRole::NovelPatch => counts.novel_patch += 1,
163 SequenceRole::UnlocalizedScaffold => counts.unlocalized_scaffold += 1,
164 SequenceRole::UnplacedScaffold => counts.unplaced_scaffold += 1,
165 SequenceRole::Unknown => counts.unknown += 1,
166 }
167 }
168 counts
169 }
170}
171
172#[derive(Debug, Clone, Default, Serialize, Deserialize)]
174pub struct RoleCounts {
175 pub assembled_molecule: usize,
176 pub alt_scaffold: usize,
177 pub fix_patch: usize,
178 pub novel_patch: usize,
179 pub unlocalized_scaffold: usize,
180 pub unplaced_scaffold: usize,
181 pub unknown: usize,
182}