1use std::collections::HashMap;
2use std::fmt;
3use std::mem;
4
5use serde::de::{self, Deserializer, Visitor};
6use serde::ser::{SerializeStruct, Serializer};
7use serde::{Deserialize, Serialize};
8
9use crate::bail;
10use crate::errors::FinchResult;
11use crate::filtering::FilterParams;
12pub use crate::serialization::mash::{read_mash_file, write_mash_file};
13use crate::serialization::Sketch;
14use crate::sketch_schemes::{KmerCount, SketchParams};
15
16#[derive(Clone, Debug, Eq, PartialEq)]
17pub struct JsonSketch {
18 pub name: String,
19 pub seq_length: Option<u64>,
20 pub num_valid_kmers: Option<u64>,
21 pub comment: Option<String>,
22 pub filters: Option<HashMap<String, String>>,
23 pub hashes: Vec<KmerCount>,
24}
25
26impl JsonSketch {
27 pub fn new(
28 name: &str,
29 length: u64,
30 n_kmers: u64,
31 kmercounts: Vec<KmerCount>,
32 filters: &HashMap<String, String>,
33 ) -> Self {
34 JsonSketch {
35 name: String::from(name),
36 seq_length: Some(length),
37 num_valid_kmers: Some(n_kmers),
38 comment: Some(String::from("")),
39 filters: Some(filters.clone()),
40 hashes: kmercounts,
41 }
42 }
43
44 pub fn len(&self) -> usize {
45 self.hashes.len()
46 }
47
48 pub fn is_empty(&self) -> bool {
49 self.hashes.is_empty()
50 }
51}
52
53impl Serialize for JsonSketch {
54 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
55 where
56 S: Serializer,
57 {
58 let mut hash_list = Vec::with_capacity(self.hashes.len());
59 let mut kmer_list = Vec::with_capacity(self.hashes.len());
60 let mut count_list = Vec::with_capacity(self.hashes.len());
61 for hash in &self.hashes {
62 hash_list.push(hash.hash.to_string());
63 kmer_list.push(String::from_utf8(hash.kmer.clone()).unwrap());
64 count_list.push(hash.count);
65 }
66
67 let mut state = serializer.serialize_struct("Sketch", 8)?;
68 state.serialize_field("name", &self.name)?;
69 state.serialize_field("seqLength", &self.seq_length)?;
70 state.serialize_field("numValidKmers", &self.num_valid_kmers)?;
71 state.serialize_field("comment", &self.comment)?;
72 state.serialize_field("filters", &self.filters)?;
73 state.serialize_field("hashes", &hash_list)?;
74 state.serialize_field("kmers", &kmer_list)?;
75 state.serialize_field("counts", &count_list)?;
76 state.end()
77 }
78}
79
80impl<'de> Deserialize<'de> for JsonSketch {
81 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
82 where
83 D: Deserializer<'de>,
84 {
85 #[allow(non_snake_case)]
86 #[derive(Deserialize)]
87 struct BaseJsonSketch {
88 pub name: String,
89 pub seqLength: Option<u64>,
90 pub numValidKmers: Option<u64>,
91 pub comment: Option<String>,
92 pub filters: Option<HashMap<String, String>>,
93 hashes: Vec<QuotedU64>,
94 kmers: Option<Vec<String>>,
95 counts: Option<Vec<u32>>,
96 }
97
98 let mut jsketch = BaseJsonSketch::deserialize(deserializer)?;
99
100 let mut kmercount_list = Vec::with_capacity(jsketch.hashes.len());
101 for i in 0..jsketch.hashes.len() {
102 let hash = jsketch.hashes[i].0;
103 let kmer = match &mut jsketch.kmers {
104 Some(v) => mem::replace(&mut v[i], String::new()).into_bytes(),
105 None => Vec::new(),
106 };
107 let count = match &jsketch.counts {
108 Some(v) => v[i],
109 None => 1,
110 };
111 kmercount_list.push(KmerCount {
112 hash,
113 kmer,
114 count,
115 extra_count: count / 2,
116 label: None,
117 });
118 }
119 Ok(JsonSketch {
120 name: jsketch.name,
121 seq_length: jsketch.seqLength,
122 num_valid_kmers: jsketch.numValidKmers,
123 comment: jsketch.comment,
124 filters: jsketch.filters,
125 hashes: kmercount_list,
126 })
127 }
128}
129
130#[derive(Debug, Serialize, Deserialize)]
131pub struct MultiSketch {
132 pub kmer: u8,
133 pub alphabet: String,
134 #[serde(rename = "preserveCase")]
135 pub preserve_case: bool,
136 pub canonical: bool,
137 #[serde(rename = "sketchSize")]
138 pub sketch_size: u32,
139 #[serde(rename = "hashType")]
140 pub hash_type: String,
141 #[serde(rename = "hashBits")]
142 pub hash_bits: u16,
143 #[serde(rename = "hashSeed")]
144 pub hash_seed: u64,
145 pub scale: Option<f64>,
146 pub sketches: Vec<JsonSketch>,
147}
148
149impl MultiSketch {
150 pub fn get_params(&self) -> FinchResult<SketchParams> {
151 Ok(match (&*self.hash_type, self.scale) {
152 ("MurmurHash3_x64_128", None) => {
153 if self.hash_bits != 64 {
154 bail!(
155 "Multisketch has incompatible hash size ({} != 64)",
156 self.hash_bits
157 );
158 }
159 SketchParams::Mash {
160 kmers_to_sketch: self.sketch_size as usize,
161 final_size: self.sketch_size as usize,
162 no_strict: true,
163 kmer_length: self.kmer,
164 hash_seed: self.hash_seed,
165 }
166 }
167 ("MurmurHash3_x64_128", Some(scale)) => {
168 if self.hash_bits != 64 {
169 bail!(
170 "Multisketch has incompatible hash size ({} != 64)",
171 self.hash_bits
172 );
173 }
174 SketchParams::Scaled {
175 kmers_to_sketch: self.sketch_size as usize,
176 kmer_length: self.kmer,
177 scale,
178 hash_seed: self.hash_seed,
179 }
180 }
181 ("None", _) => SketchParams::AllCounts {
182 kmer_length: self.kmer,
183 },
184 (x, _) => bail!("{} sketch type is not supported", x),
185 })
186 }
187
188 pub fn from_sketches(sketches: &[Sketch]) -> FinchResult<Self> {
189 let json_sketches: Vec<JsonSketch> = sketches.iter().map(|x| (*x).clone().into()).collect();
190 let sketch_params = SketchParams::from_sketches(&sketches)?;
191 let (hash_type, hash_bits, hash_seed, scale) = sketch_params.hash_info();
194 Ok(MultiSketch {
195 alphabet: "ACGT".to_string(),
196 preserve_case: false,
197 canonical: true,
198
199 sketch_size: sketch_params.expected_size() as u32,
200 kmer: sketch_params.k(),
201 hash_type: hash_type.to_string(),
202 hash_bits,
203 hash_seed,
204 scale,
205 sketches: json_sketches,
206 })
207 }
208
209 pub fn to_sketches(&self) -> FinchResult<Vec<Sketch>> {
210 let empty_hashmap = HashMap::new();
211 let mut sketches = Vec::with_capacity(self.sketches.len());
212 let sketch_params = self.get_params()?;
213 for sketch in &self.sketches {
214 let filters = sketch.filters.as_ref().unwrap_or(&empty_hashmap);
215 let filter_params = FilterParams::from_serialized(filters)?;
216 sketches.push(Sketch {
217 name: sketch.name.clone(),
218 seq_length: sketch.seq_length.unwrap_or(0),
219 num_valid_kmers: sketch.num_valid_kmers.unwrap_or(0),
220 comment: sketch.comment.clone().unwrap_or_else(|| "".to_string()),
221 hashes: sketch.hashes.clone(),
222 filter_params,
223 sketch_params: sketch_params.clone(),
224 });
225 }
226 Ok(sketches)
227 }
228}
229
230struct QuotedU64(u64);
231
232impl<'de> Deserialize<'de> for QuotedU64 {
233 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
234 where
235 D: Deserializer<'de>,
236 {
237 struct QuotedU64Visitor;
238
239 impl<'de> Visitor<'de> for QuotedU64Visitor {
240 type Value = QuotedU64;
241
242 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
243 formatter.write_str("usize as a json string")
244 }
245
246 fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
247 where
248 E: de::Error,
249 {
250 value.parse().map(QuotedU64).map_err(de::Error::custom)
251 }
252 }
253
254 deserializer.deserialize_str(QuotedU64Visitor)
255 }
256}