1use std::arch::x86_64::*;
2use std::collections::HashSet;
3use std::path::PathBuf;
4
5use serde::{Deserialize, Serialize};
6use t1ha;
7
8#[inline]
9pub fn mm_hash(bytes: &[u8]) -> usize {
10 let mut key = usize::from_ne_bytes(bytes.try_into().unwrap());
11 key = !key.wrapping_add(key << 21); key = key ^ key >> 24;
13 key = (key.wrapping_add(key << 3)).wrapping_add(key << 8); key = key ^ key >> 14;
15 key = (key.wrapping_add(key << 2)).wrapping_add(key << 4); key = key ^ key >> 28;
17 key = key.wrapping_add(key << 31);
18 key
19}
20
21#[inline]
22pub fn mm_hash64(kmer: u64) -> u64 {
23 let mut key = kmer;
24 key = !key + (key << 21);
25 key = key ^ key >> 24;
26 key = (key + (key << 3)) + (key << 8);
27 key = key ^ key >> 14;
28 key = (key + (key << 2)) + (key << 4);
29 key = key ^ key >> 28;
30 key = key + (key << 31);
31 key
32}
33
34#[inline]
36#[target_feature(enable = "avx2")]
37pub unsafe fn mm_hash64_avx2(kmer: __m256i) -> __m256i {
38 let mut key = kmer;
39 let s1 = _mm256_slli_epi64(key, 21);
40 key = _mm256_add_epi64(key, s1);
41 key = _mm256_xor_si256(key, _mm256_cmpeq_epi64(key, key));
42
43 key = _mm256_xor_si256(key, _mm256_srli_epi64(key, 24));
44 let s2 = _mm256_slli_epi64(key, 3);
45 let s3 = _mm256_slli_epi64(key, 8);
46
47 key = _mm256_add_epi64(key, s2);
48 key = _mm256_add_epi64(key, s3);
49 key = _mm256_xor_si256(key, _mm256_srli_epi64(key, 14));
50 let s4 = _mm256_slli_epi64(key, 2);
51 let s5 = _mm256_slli_epi64(key, 4);
52 key = _mm256_add_epi64(key, s4);
53 key = _mm256_add_epi64(key, s5);
54 key = _mm256_xor_si256(key, _mm256_srli_epi64(key, 28));
55
56 let s6 = _mm256_slli_epi64(key, 31);
57 key = _mm256_add_epi64(key, s6);
58
59 key
60}
61
62pub struct CliParams {
63 pub mode: String,
64 pub path: PathBuf,
65 pub path_ref_sketch: PathBuf,
66 pub path_query_sketch: PathBuf,
67 pub out_file: PathBuf,
68
69 pub ksize: u8,
70 pub seed: u64,
71 pub sketch_method: String,
72 pub canonical: bool,
73 pub device: String,
74 pub scaled: u64,
75 pub hv_d: usize,
76 pub hv_quant_scale: f32,
77 pub ani_threshold: f32,
78 pub if_compressed: bool,
79
80 pub threads: u8,
81}
82
83pub struct SketchParams {
84 pub path: PathBuf,
85 pub out_file: PathBuf,
86 pub sketch_method: String,
87 pub canonical: bool,
88 pub device: String,
89 pub ksize: u8,
90 pub seed: u64,
91 pub scaled: u64,
92 pub hv_d: usize,
93 pub hv_quant_scale: f32,
94 pub if_compressed: bool,
95}
96
97impl Default for SketchParams {
98 fn default() -> Self {
99 SketchParams {
100 path: (PathBuf::new()),
101 out_file: (PathBuf::new()),
102 sketch_method: String::from("t1ha2"),
103 canonical: (true),
104 device: String::from("cpu"),
105 ksize: (21),
106 seed: (123),
107 scaled: (1500),
108 hv_d: (4096),
109 hv_quant_scale: (1.0),
110 if_compressed: (true),
111 }
112 }
113}
114
115impl SketchParams {
116 pub fn new(params: &CliParams) -> SketchParams {
117 let mut new_sketch = SketchParams::default();
118 new_sketch.path = params.path.clone();
119 new_sketch.out_file = params.out_file.clone();
120 new_sketch.sketch_method = params.sketch_method.clone();
121 new_sketch.canonical = params.canonical.clone();
122 new_sketch.device = params.device.clone();
123 new_sketch.ksize = params.ksize;
124 new_sketch.seed = params.seed;
125 new_sketch.scaled = params.scaled;
126 new_sketch.hv_d = params.hv_d;
127 new_sketch.hv_quant_scale = params.hv_quant_scale;
128 new_sketch.if_compressed = params.if_compressed;
129 new_sketch
130 }
131}
132
133pub struct Sketch {
134 pub file_name: String,
135 pub sketch_method: String,
136 pub canonical: bool,
137 pub ksize: u8,
138 pub seed: u64,
139 pub scaled: u64,
140 pub threshold: u64,
141 pub hash_set: HashSet<u64>,
142 pub hv_quant_scale: f32,
143 pub hv_quant_bits: u8,
144 pub hv_d: usize,
145 pub hv: Vec<i16>,
146 pub hv_l2_norm_sq: i32,
147}
148
149impl Default for Sketch {
150 fn default() -> Self {
151 Sketch {
152 file_name: String::from(""),
153 sketch_method: String::from("xxh3"),
154 canonical: (true),
155 ksize: (21),
156 seed: (123),
157 scaled: (2000),
158 threshold: (u64::MAX / 2000),
159 hash_set: HashSet::default(),
160 hv_quant_scale: (1.0),
161 hv_quant_bits: (0),
162 hv_d: (4096),
163 hv: (vec![]),
164 hv_l2_norm_sq: (0),
165 }
166 }
167}
168
169impl Sketch {
170 pub fn new(file: String, params: &SketchParams) -> Sketch {
171 let mut new_sketch = Sketch::default();
172 new_sketch.file_name = file;
173 new_sketch.sketch_method = params.sketch_method.clone();
174 new_sketch.canonical = params.canonical;
175 new_sketch.ksize = params.ksize;
176 new_sketch.seed = params.seed;
177 new_sketch.scaled = params.scaled;
178 new_sketch.hv_d = params.hv_d;
179 new_sketch.hv_quant_scale = params.hv_quant_scale;
180 new_sketch.threshold = u64::MAX / params.scaled;
181 new_sketch
182 }
183
184 pub fn insert_kmer(&mut self, kmer: &[u8]) {
185 let h = match self.sketch_method.as_str() {
186 "t1ha2" => t1ha::t1ha2_atonce(kmer, self.seed),
187 "mmhash" => mm_hash(kmer) as u64,
188 _ => t1ha::t1ha2_atonce(kmer, self.seed),
189 };
190
191 if h < self.threshold {
192 self.hash_set.insert(h);
193 }
194 }
195
196 pub fn insert_kmer_u64(&mut self, kmer: u64) {
197 let h = match self.sketch_method.as_str() {
198 "t1ha2_64" => t1ha::t1ha2_atonce(&kmer.to_be_bytes(), 123),
199 "mmhash64" => mm_hash64(kmer),
200 _ => t1ha::t1ha2_atonce(&kmer.to_be_bytes(), 123),
201 };
202
203 if h < self.threshold {
204 self.hash_set.insert(h);
205 }
206 }
207
208 pub unsafe fn insert_kmer_u64_avx2(&mut self, kmer: __m256i) {
209 let hash_256 = mm_hash64_avx2(kmer);
210
211 let h1 = _mm256_extract_epi64(hash_256, 0) as u64;
212 let h2 = _mm256_extract_epi64(hash_256, 1) as u64;
213 let h3 = _mm256_extract_epi64(hash_256, 2) as u64;
214 let h4 = _mm256_extract_epi64(hash_256, 3) as u64;
215
216 for h in [h1, h2, h3, h4] {
217 if h > 0 && h < self.threshold {
218 self.hash_set.insert(h);
219 }
220 }
221 }
222}
223
224#[derive(Serialize, Deserialize, PartialEq, Debug, Clone)]
225pub struct FileSketch {
226 pub ksize: u8,
227 pub scaled: u64,
228 pub canonical: bool,
229 pub seed: u64,
230 pub hv_d: usize,
231 pub hv_quant_bits: u8,
232 pub hv_norm_2: i32,
233 pub file_str: String,
234 pub hv: Vec<i16>,
235}
236
237pub struct SketchDist {
238 pub path_ref_sketch: PathBuf,
239 pub path_query_sketch: PathBuf,
240 pub out_file: PathBuf,
241 pub ksize: u8,
242 pub hv_d: usize,
243 pub ani_threshold: f32,
244 pub file_ani: Vec<((String, String), f32)>,
245}
246
247impl Default for SketchDist {
248 fn default() -> Self {
249 SketchDist {
250 path_ref_sketch: (PathBuf::new()),
251 path_query_sketch: (PathBuf::new()),
252 out_file: (PathBuf::new()),
253 ksize: (21),
254 hv_d: (1024),
255 ani_threshold: (85.0),
256 file_ani: (Vec::<((String, String), f32)>::new()),
257 }
258 }
259}
260
261impl SketchDist {
262 pub fn new(params: &CliParams) -> SketchDist {
263 let mut new_dist = SketchDist::default();
264 new_dist.path_ref_sketch = params.path_ref_sketch.clone();
265 new_dist.path_query_sketch = params.path_query_sketch.clone();
266 new_dist.out_file = params.out_file.clone();
267 new_dist.ksize = params.ksize;
268 new_dist.hv_d = params.hv_d;
269 new_dist.ani_threshold = params.ani_threshold;
270 new_dist
271 }
272}