1use serde::{Deserialize, Serialize};
2use std::collections::HashSet;
3
4use crate::core::contig::{detect_naming_convention, Contig};
5use crate::core::types::NamingConvention;
6use crate::utils::validation::compute_signature;
7
8#[inline]
10fn count_to_f64(count: usize) -> f64 {
11 #[allow(clippy::cast_precision_loss)]
12 {
13 count as f64
14 }
15}
16
17#[derive(Debug, Clone, Serialize, Deserialize)]
19#[non_exhaustive]
20pub struct QueryHeader {
21 #[serde(default, skip_serializing_if = "Option::is_none")]
23 pub source: Option<String>,
24
25 pub contigs: Vec<Contig>,
27
28 pub naming_convention: NamingConvention,
30
31 #[serde(skip)]
34 pub md5_set: HashSet<String>,
35
36 #[serde(skip)]
38 pub sha512t24u_set: HashSet<String>,
39
40 #[serde(skip)]
42 pub name_length_set: HashSet<(String, u64)>,
43
44 #[serde(skip)]
46 pub alias_length_set: HashSet<(String, u64)>,
47
48 #[serde(skip)]
50 pub signature: Option<String>,
51}
52
53impl QueryHeader {
54 #[must_use]
55 pub fn new(contigs: Vec<Contig>) -> Self {
56 let naming_convention = detect_naming_convention(&contigs);
57
58 let mut header = Self {
59 source: None,
60 contigs,
61 naming_convention,
62 md5_set: HashSet::new(),
63 sha512t24u_set: HashSet::new(),
64 name_length_set: HashSet::new(),
65 alias_length_set: HashSet::new(),
66 signature: None,
67 };
68
69 header.rebuild_indexes();
70 header
71 }
72
73 #[must_use]
74 pub fn with_source(mut self, source: impl Into<String>) -> Self {
75 self.source = Some(source.into());
76 self
77 }
78
79 pub fn rebuild_indexes(&mut self) {
80 self.md5_set.clear();
81 self.sha512t24u_set.clear();
82 self.name_length_set.clear();
83 self.alias_length_set.clear();
84
85 for contig in &self.contigs {
86 if let Some(md5) = &contig.md5 {
87 self.md5_set.insert(md5.clone());
88 }
89 if let Some(digest) = &contig.sha512t24u {
90 self.sha512t24u_set.insert(digest.clone());
91 }
92 self.name_length_set
94 .insert((contig.name.clone(), contig.length));
95
96 for alias in &contig.aliases {
98 self.alias_length_set.insert((alias.clone(), contig.length));
99 }
100 }
101
102 let sig = compute_signature(&self.md5_set);
104 if !sig.is_empty() {
105 self.signature = Some(sig);
106 }
107 }
108
109 #[must_use]
111 pub fn has_md5s(&self) -> bool {
112 !self.md5_set.is_empty()
113 }
114
115 #[must_use]
117 pub fn md5_coverage(&self) -> f64 {
118 if self.contigs.is_empty() {
119 return 0.0;
120 }
121 count_to_f64(self.contigs.iter().filter(|c| c.md5.is_some()).count())
122 / count_to_f64(self.contigs.len())
123 }
124
125 #[cfg(test)]
127 #[must_use]
128 pub fn primary_contigs(&self) -> Vec<&Contig> {
129 self.contigs
130 .iter()
131 .filter(|c| c.is_primary_chromosome() || c.is_mitochondrial())
132 .collect()
133 }
134}
135
136#[cfg(test)]
137mod tests {
138 use super::*;
139
140 #[test]
141 fn test_query_header_new() {
142 let contigs = vec![Contig::new("chr1", 100), Contig::new("chr2", 200)];
143 let header = QueryHeader::new(contigs);
144
145 assert_eq!(header.contigs.len(), 2);
146 assert_eq!(header.naming_convention, NamingConvention::Ucsc);
147 }
148
149 #[test]
150 fn test_query_header_md5_set() {
151 let contigs = vec![
152 Contig::new("chr1", 100).with_md5("abc123"),
153 Contig::new("chr2", 200).with_md5("def456"),
154 ];
155 let header = QueryHeader::new(contigs);
156
157 assert!(header.has_md5s());
158 assert_eq!(header.md5_set.len(), 2);
159 assert!(header.md5_set.contains("abc123"));
160 assert!(header.md5_set.contains("def456"));
161 }
162
163 #[test]
164 fn test_query_header_no_md5() {
165 let contigs = vec![Contig::new("chr1", 100), Contig::new("chr2", 200)];
166 let header = QueryHeader::new(contigs);
167
168 assert!(!header.has_md5s());
169 assert!(header.md5_set.is_empty());
170 assert!(header.signature.is_none());
171 }
172
173 #[test]
174 fn test_md5_coverage() {
175 let contigs = vec![
176 Contig::new("chr1", 100).with_md5("abc123"),
177 Contig::new("chr2", 200),
178 Contig::new("chr3", 300).with_md5("ghi789"),
179 Contig::new("chr4", 400),
180 ];
181 let header = QueryHeader::new(contigs);
182
183 assert!((header.md5_coverage() - 0.5).abs() < 0.01);
184 }
185
186 #[test]
187 fn test_md5_coverage_empty() {
188 let header = QueryHeader::new(vec![]);
189 assert!((header.md5_coverage() - 0.0).abs() < 0.01);
190 }
191
192 #[test]
193 fn test_md5_coverage_full() {
194 let contigs = vec![
195 Contig::new("chr1", 100).with_md5("abc123"),
196 Contig::new("chr2", 200).with_md5("def456"),
197 ];
198 let header = QueryHeader::new(contigs);
199
200 assert!((header.md5_coverage() - 1.0).abs() < 0.01);
201 }
202
203 #[test]
204 fn test_name_length_set() {
205 let contigs = vec![Contig::new("chr1", 100), Contig::new("chr2", 200)];
206 let header = QueryHeader::new(contigs);
207
208 assert_eq!(header.name_length_set.len(), 2);
209 assert!(header.name_length_set.contains(&("chr1".to_string(), 100)));
211 assert!(header.name_length_set.contains(&("chr2".to_string(), 200)));
212 }
213
214 #[test]
215 fn test_signature_computed() {
216 let contigs = vec![
217 Contig::new("chr1", 100).with_md5("abc123"),
218 Contig::new("chr2", 200).with_md5("def456"),
219 ];
220 let header = QueryHeader::new(contigs);
221
222 assert!(header.signature.is_some());
223 assert_eq!(header.signature.as_ref().unwrap().len(), 32);
224 }
225
226 #[test]
227 fn test_signature_deterministic() {
228 let contigs1 = vec![
229 Contig::new("chr1", 100).with_md5("abc"),
230 Contig::new("chr2", 200).with_md5("def"),
231 ];
232 let contigs2 = vec![
233 Contig::new("chr2", 200).with_md5("def"),
234 Contig::new("chr1", 100).with_md5("abc"),
235 ];
236 let header1 = QueryHeader::new(contigs1);
237 let header2 = QueryHeader::new(contigs2);
238
239 assert_eq!(header1.signature, header2.signature);
241 }
242
243 #[test]
244 fn test_with_source() {
245 let contigs = vec![Contig::new("chr1", 100)];
246 let header = QueryHeader::new(contigs).with_source("/path/to/file.bam");
247
248 assert_eq!(header.source, Some("/path/to/file.bam".to_string()));
249 }
250
251 #[test]
252 fn test_primary_contigs() {
253 let contigs = vec![
254 Contig::new("chr1", 100),
255 Contig::new("chr22", 200),
256 Contig::new("chrX", 300),
257 Contig::new("chrY", 400),
258 Contig::new("chrM", 500),
259 Contig::new("chr1_random", 600),
260 Contig::new("chrUn_gl000220", 700),
261 ];
262 let header = QueryHeader::new(contigs);
263
264 let primary = header.primary_contigs();
265 assert_eq!(primary.len(), 5); }
267
268 #[test]
269 fn test_detect_ucsc_naming() {
270 let contigs = vec![Contig::new("chr1", 100), Contig::new("chr2", 200)];
271 let header = QueryHeader::new(contigs);
272 assert_eq!(header.naming_convention, NamingConvention::Ucsc);
273 }
274
275 #[test]
276 fn test_detect_ncbi_naming() {
277 let contigs = vec![Contig::new("1", 100), Contig::new("2", 200)];
278 let header = QueryHeader::new(contigs);
279 assert_eq!(header.naming_convention, NamingConvention::Ncbi);
280 }
281
282 #[test]
283 fn test_detect_mixed_naming() {
284 let contigs = vec![Contig::new("chr1", 100), Contig::new("2", 200)];
285 let header = QueryHeader::new(contigs);
286 assert_eq!(header.naming_convention, NamingConvention::Mixed);
287 }
288}