jam_rs/
signature.rs

1use crate::{
2    cli::HashAlgorithms,
3    hasher::NoHashHasher,
4    sketch::{Sketch, Stats},
5};
6use serde::{Deserialize, Serialize};
7use sourmash::signature::{Signature as SourmashSignature, SigsTrait};
8use std::hash::BuildHasherDefault;
9
10#[derive(Debug, Serialize, Deserialize, Clone)]
11pub struct Signature {
12    pub file_name: String,
13    pub sketches: Vec<Sketch>,
14    pub algorithm: HashAlgorithms,
15    pub kmer_size: u8,
16    pub max_hash: u64,
17}
18
19impl From<Signature> for SourmashSignature {
20    fn from(val: Signature) -> Self {
21        SourmashSignature::builder()
22            .hash_function(format!("{:?}", val.algorithm))
23            .filename(Some(val.file_name))
24            .email("".to_string())
25            .license("CC0".to_string())
26            .name(None)
27            .signatures(
28                val.sketches
29                    .into_iter()
30                    .map(|sketch| sketch.into_sourmash(val.max_hash))
31                    .collect(),
32            )
33            .build()
34    }
35}
36
37impl From<SourmashSignature> for Signature {
38    fn from(sourmash_signature: SourmashSignature) -> Self {
39        let mut sketches = Vec::new();
40        let mut max_hash = None;
41        let mut kmer_size = None;
42        for sketch in sourmash_signature.sketches() {
43            match sketch {
44                sourmash::sketch::Sketch::MinHash(mash) => {
45                    if let Some(max_hash) = max_hash {
46                        if max_hash != mash.max_hash() {
47                            panic!("Max hash of sketches is not equal");
48                        }
49                    } else {
50                        max_hash = Some(mash.max_hash());
51                    }
52
53                    if let Some(kmer_size) = kmer_size {
54                        if kmer_size != mash.ksize() as u8 {
55                            panic!("Kmer size of sketches is not equal");
56                        }
57                    } else {
58                        kmer_size = Some(mash.ksize() as u8);
59                    }
60
61                    let mut sketch = Sketch::new(
62                        sourmash_signature.filename(),
63                        mash.mins().len(),
64                        mash.max_hash() as usize,
65                        mash.ksize() as u8,
66                    );
67                    sketch.hashes = mash
68                        .mins()
69                        .iter()
70                        .map(|x| (*x, None))
71                        .collect::<std::collections::HashMap<
72                            u64,
73                            Option<Stats>,
74                            BuildHasherDefault<NoHashHasher>,
75                        >>();
76                    sketches.push(sketch);
77                }
78                sourmash::sketch::Sketch::LargeMinHash(mash) => {
79                    if let Some(max_hash) = max_hash {
80                        if max_hash != mash.max_hash() {
81                            panic!("Max hash of sketches is not equal");
82                        }
83                    } else {
84                        max_hash = Some(mash.max_hash());
85                    }
86
87                    if let Some(kmer_size) = kmer_size {
88                        if kmer_size != mash.ksize() as u8 {
89                            panic!("Kmer size of sketches is not equal");
90                        }
91                    } else {
92                        kmer_size = Some(mash.ksize() as u8);
93                    }
94
95                    let mut sketch = Sketch::new(
96                        sourmash_signature.filename(),
97                        mash.mins().len(),
98                        mash.max_hash() as usize,
99                        mash.ksize() as u8,
100                    );
101                    sketch.hashes = mash
102                        .mins()
103                        .iter()
104                        .map(|x| (*x, None))
105                        .collect::<std::collections::HashMap<
106                            u64,
107                            Option<Stats>,
108                            BuildHasherDefault<NoHashHasher>,
109                        >>();
110                    sketches.push(sketch);
111                }
112                sourmash::sketch::Sketch::HyperLogLog(_) => {
113                    unimplemented!("HyperLogLog sketches are not supported")
114                }
115            }
116        }
117        Signature {
118            file_name: sourmash_signature.filename(),
119            sketches,
120            algorithm: HashAlgorithms::Murmur3,
121            kmer_size: kmer_size.expect("No sketch with kmer_size found"),
122            max_hash: max_hash.expect("No sketch with max hash found"),
123        }
124    }
125}
126
127impl Signature {
128    pub fn collapse(&mut self) -> Sketch {
129        let mut sketch = Sketch::new(self.file_name.to_string(), 0, 0, self.kmer_size);
130        for old_sketch in self.sketches.drain(..) {
131            sketch.hashes.extend(old_sketch.hashes);
132            sketch.num_kmers += old_sketch.num_kmers;
133            sketch.max_kmers += old_sketch.max_kmers;
134        }
135        sketch
136    }
137}