1#[cfg(feature = "async")]
4pub mod r#async;
5
6pub mod codecs;
7pub mod container;
8pub mod crai;
9pub mod file_definition;
10pub mod fs;
11mod huffman;
12pub mod io;
13pub mod record;
14
15use md5::{Digest, Md5};
16
17pub use self::{file_definition::FileDefinition, record::Record};
18
19const MAGIC_NUMBER: [u8; 4] = *b"CRAM";
20const MD5_OUTPUT_SIZE: usize = 16;
21
22fn calculate_normalized_sequence_digest(mut sequence: &[u8]) -> [u8; MD5_OUTPUT_SIZE] {
24 const MD5_BLOCK_SIZE: usize = 64;
25 const CHUNK_SIZE: usize = 8 * MD5_BLOCK_SIZE;
26
27 let mut hasher = Md5::new();
28 let mut buf = [0; CHUNK_SIZE];
29
30 while !sequence.is_empty() {
31 let n = sequence
32 .iter()
33 .position(|b| !b.is_ascii_graphic() || b.is_ascii_lowercase())
34 .unwrap_or(sequence.len());
35
36 hasher.update(&sequence[..n]);
37 sequence = &sequence[n..];
38
39 loop {
41 let mut n = 0;
42
43 for (src, dst) in sequence
44 .iter()
45 .take_while(|b| b.is_ascii_lowercase())
46 .zip(&mut buf)
47 {
48 *dst = src.to_ascii_uppercase();
49 n += 1;
50 }
51
52 if n == 0 {
53 break;
54 }
55
56 hasher.update(&buf[..n]);
57 sequence = &sequence[n..];
58 }
59
60 let n = sequence
62 .iter()
63 .position(|b| b.is_ascii_graphic())
64 .unwrap_or(sequence.len());
65
66 sequence = &sequence[n..];
67 }
68
69 hasher.finalize().into()
70}
71
72#[cfg(test)]
73mod tests {
74 use super::*;
75
76 #[test]
77 fn test_calculate_normalized_sequence_digest() {
78 assert_eq!(
79 calculate_normalized_sequence_digest(b"ACGT"),
80 [
81 0xf1, 0xf8, 0xf4, 0xbf, 0x41, 0x3b, 0x16, 0xad, 0x13, 0x57, 0x22, 0xaa, 0x45, 0x91,
82 0x04, 0x3e
83 ]
84 );
85
86 assert_eq!(
87 calculate_normalized_sequence_digest(b" AC\tgt\n"),
88 [
89 0xf1, 0xf8, 0xf4, 0xbf, 0x41, 0x3b, 0x16, 0xad, 0x13, 0x57, 0x22, 0xaa, 0x45, 0x91,
90 0x04, 0x3e
91 ]
92 );
93
94 assert_eq!(
97 calculate_normalized_sequence_digest(b"ACGT ACGT ACGT\nacgt acgt acgt\n... 12345 !!!"),
98 [
99 0xdf, 0xab, 0xdb, 0xb3, 0x6e, 0x23, 0x9a, 0x6d, 0xa8, 0x89, 0x57, 0x84, 0x1f, 0x32,
100 0xb8, 0xe4
101 ]
102 );
103 }
104}