1use crate::{
2 cli::HashAlgorithms,
3 hasher::NoHashHasher,
4 sketch::{Sketch, Stats},
5};
6use serde::{Deserialize, Serialize};
7use sourmash::signature::{Signature as SourmashSignature, SigsTrait};
8use std::hash::BuildHasherDefault;
9
10#[derive(Debug, Serialize, Deserialize, Clone)]
11pub struct Signature {
12 pub file_name: String,
13 pub sketches: Vec<Sketch>,
14 pub algorithm: HashAlgorithms,
15 pub kmer_size: u8,
16 pub max_hash: u64,
17}
18
19impl From<Signature> for SourmashSignature {
20 fn from(val: Signature) -> Self {
21 SourmashSignature::builder()
22 .hash_function(format!("{:?}", val.algorithm))
23 .filename(Some(val.file_name))
24 .email("".to_string())
25 .license("CC0".to_string())
26 .name(None)
27 .signatures(
28 val.sketches
29 .into_iter()
30 .map(|sketch| sketch.into_sourmash(val.max_hash))
31 .collect(),
32 )
33 .build()
34 }
35}
36
37impl From<SourmashSignature> for Signature {
38 fn from(sourmash_signature: SourmashSignature) -> Self {
39 let mut sketches = Vec::new();
40 let mut max_hash = None;
41 let mut kmer_size = None;
42 for sketch in sourmash_signature.sketches() {
43 match sketch {
44 sourmash::sketch::Sketch::MinHash(mash) => {
45 if let Some(max_hash) = max_hash {
46 if max_hash != mash.max_hash() {
47 panic!("Max hash of sketches is not equal");
48 }
49 } else {
50 max_hash = Some(mash.max_hash());
51 }
52
53 if let Some(kmer_size) = kmer_size {
54 if kmer_size != mash.ksize() as u8 {
55 panic!("Kmer size of sketches is not equal");
56 }
57 } else {
58 kmer_size = Some(mash.ksize() as u8);
59 }
60
61 let mut sketch = Sketch::new(
62 sourmash_signature.filename(),
63 mash.mins().len(),
64 mash.max_hash() as usize,
65 mash.ksize() as u8,
66 );
67 sketch.hashes = mash
68 .mins()
69 .iter()
70 .map(|x| (*x, None))
71 .collect::<std::collections::HashMap<
72 u64,
73 Option<Stats>,
74 BuildHasherDefault<NoHashHasher>,
75 >>();
76 sketches.push(sketch);
77 }
78 sourmash::sketch::Sketch::LargeMinHash(mash) => {
79 if let Some(max_hash) = max_hash {
80 if max_hash != mash.max_hash() {
81 panic!("Max hash of sketches is not equal");
82 }
83 } else {
84 max_hash = Some(mash.max_hash());
85 }
86
87 if let Some(kmer_size) = kmer_size {
88 if kmer_size != mash.ksize() as u8 {
89 panic!("Kmer size of sketches is not equal");
90 }
91 } else {
92 kmer_size = Some(mash.ksize() as u8);
93 }
94
95 let mut sketch = Sketch::new(
96 sourmash_signature.filename(),
97 mash.mins().len(),
98 mash.max_hash() as usize,
99 mash.ksize() as u8,
100 );
101 sketch.hashes = mash
102 .mins()
103 .iter()
104 .map(|x| (*x, None))
105 .collect::<std::collections::HashMap<
106 u64,
107 Option<Stats>,
108 BuildHasherDefault<NoHashHasher>,
109 >>();
110 sketches.push(sketch);
111 }
112 sourmash::sketch::Sketch::HyperLogLog(_) => {
113 unimplemented!("HyperLogLog sketches are not supported")
114 }
115 }
116 }
117 Signature {
118 file_name: sourmash_signature.filename(),
119 sketches,
120 algorithm: HashAlgorithms::Murmur3,
121 kmer_size: kmer_size.expect("No sketch with kmer_size found"),
122 max_hash: max_hash.expect("No sketch with max hash found"),
123 }
124 }
125}
126
127impl Signature {
128 pub fn collapse(&mut self) -> Sketch {
129 let mut sketch = Sketch::new(self.file_name.to_string(), 0, 0, self.kmer_size);
130 for old_sketch in self.sketches.drain(..) {
131 sketch.hashes.extend(old_sketch.hashes);
132 sketch.num_kmers += old_sketch.num_kmers;
133 sketch.max_kmers += old_sketch.max_kmers;
134 }
135 sketch
136 }
137}