jsonm/
packer.rs

1extern crate regex;
2extern crate serde;
3extern crate serde_json;
4
5use self::regex::Regex;
6use self::serde::Serialize;
7use serde_json::Value;
8use std::collections::HashMap;
9use std::error::Error;
10use std::fmt;
11use std::vec::Vec;
12
13const MIN_DICT_INDEX: u64 = 3;
14const TYPE_ARRAY: u32 = 0;
15const TYPE_VALUE: u32 = 1;
16const TYPE_STRING: u32 = 2;
17const MAX_PACK_COMPLEX_OBJECT_SIZE: usize = 12;
18
19#[derive(Default)]
20pub struct PackOptions {
21    pub pack_string_depth: i32,
22    pub no_sequence_id: bool,
23}
24
25#[derive(Default, Debug)]
26pub struct MemoObject {
27    pub key: String,
28    pub value: String,
29}
30
31impl PackOptions {
32    pub fn new() -> PackOptions {
33        PackOptions {
34            pack_string_depth: -1,
35            no_sequence_id: false,
36        }
37    }
38}
39
40/// Packer used to pack/compress json-like structures.
41#[derive(Default, Debug)]
42pub struct Packer {
43    memoised: HashMap<u64, MemoObject>,
44    memoised_map: HashMap<String, u64>,
45    memoised_object_map: HashMap<String, u64>,
46    memoised_index: u64,
47    sequence_id: i64,
48    max_dict_size: u64,
49}
50
51#[derive(Debug, Clone)]
52pub struct PackerError {
53    cause: String,
54}
55
56impl fmt::Display for PackerError {
57    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
58        write!(f, "PackerError")
59    }
60}
61
62impl Error for PackerError {
63    fn description(&self) -> &str {
64        "Packer Error"
65    }
66
67    fn cause(&self) -> Option<&dyn Error> {
68        None
69    }
70}
71
72impl Packer {
73    pub fn new() -> Packer {
74        Packer {
75            sequence_id: -1,
76            max_dict_size: 2000,
77            memoised_index: MIN_DICT_INDEX,
78            ..Default::default()
79        }
80    }
81
82    /// Pack an JSON-like object.
83    pub fn pack<T>(&mut self, object: &T, options: &PackOptions) -> Result<Value, PackerError>
84    where
85        T: Serialize,
86    {
87        let json_object = json!(object);
88        let result = self.pack_object_or_value(&json_object, options.pack_string_depth);
89        if options.no_sequence_id {
90            return result;
91        }
92
93        match result {
94            Ok(mut value) => {
95                self.sequence_id += 1;
96                if !value.is_array() {
97                    return Ok(json!([json!(TYPE_VALUE), value, json!(self.sequence_id)]));
98                }
99
100                match value.as_array_mut() {
101                    Some(result) => {
102                        result.push(json!(self.sequence_id));
103                        Ok(json!(result))
104                    }
105                    None => Err(PackerError {
106                        cause: "unknown".to_owned(),
107                    }),
108                }
109            }
110            Err(err) => Err(err),
111        }
112    }
113
114    /// Pack a string. Efficiently packs multi-line strings and JSON strings.
115    /// When unpacked, a string is always returned again.
116    pub fn pack_string(
117        &mut self,
118        string_to_pack: &str,
119        options: &PackOptions,
120    ) -> Result<Value, PackerError> {
121        match serde_json::from_str(string_to_pack) {
122            Ok(value) => self.pack::<Value>(&value, options),
123            Err(_err) => {
124                let string = string_to_pack.to_owned();
125                let lines = string.lines();
126                let mut result_vec = Vec::new();
127                for line in lines {
128                    result_vec.push(json!(line));
129                }
130
131                let mut result = match self.pack(&json!(result_vec), options) {
132                    Ok(result) => result,
133                    Err(err) => return Err(err),
134                };
135
136                let vec = match result.as_array_mut() {
137                    Some(result) => result,
138                    None => {
139                        return Err(PackerError {
140                            cause: "unknown".to_owned(),
141                        })
142                    }
143                };
144
145                vec[0] = json!(TYPE_STRING);
146                Ok(json!(vec))
147            }
148        }
149    }
150    /// Reset the memoization dictionary, allowing consumption by new Unpacker instances.
151    pub fn reset(&mut self) {
152        self.memoised = HashMap::new();
153        self.memoised_map = HashMap::new();
154        self.memoised_object_map = HashMap::new();
155        self.memoised_index = MIN_DICT_INDEX;
156        self.sequence_id = -1;
157    }
158
159    /// Set the maximum dictionary size. Must match the dictionary size used by the unpacker.
160    /// Default - 2000
161    pub fn set_max_dict_size(&mut self, value: u64) {
162        self.max_dict_size = value;
163    }
164
165    fn pack_object_or_value(
166        &mut self,
167        object: &Value,
168        pack_string_depth: i32,
169    ) -> Result<Value, PackerError> {
170        if object.is_null() {
171            return Ok(Value::Null);
172        }
173
174        if object.is_array() {
175            let arr = match object.as_array() {
176                Some(arr) => arr,
177                None => {
178                    return Err(PackerError {
179                        cause: "unknown".to_string(),
180                    })
181                }
182            };
183            return Ok(self.pack_array(arr, pack_string_depth - 1));
184        }
185
186        if object.is_string() && pack_string_depth >= 0 {
187            let obj_str = match object.as_str() {
188                Some(arr) => arr,
189                None => {
190                    return Err(PackerError {
191                        cause: "unknown".to_string(),
192                    })
193                }
194            };
195            let options = PackOptions {
196                no_sequence_id: true,
197                pack_string_depth: 0,
198            };
199            return self.pack_string(obj_str, &options);
200        }
201
202        if !object.is_object() {
203            return Ok(self.pack_value(object));
204        }
205
206        return self.pack_object(object, pack_string_depth);
207    }
208
209    fn pack_object(
210        &mut self,
211        object: &Value,
212        pack_string_depth: i32,
213    ) -> Result<Value, PackerError> {
214        let obj = match object.as_object() {
215            Some(obj) => obj,
216            None => {
217                return Err(PackerError {
218                    cause: "unknown".to_owned(),
219                })
220            }
221        };
222        let mut results: Vec<Value> = Vec::new();
223        for (key, _value) in obj {
224            results.push(self.pack_value(&json!(key)));
225        }
226
227        for (_key, value) in obj {
228            if value.is_object() || value.is_array() {
229                match self.pack_object_or_value(value, pack_string_depth - 1) {
230                    Ok(object) => results.push(object),
231                    Err(err) => return Err(err),
232                };
233            } else if value.is_string() {
234                if pack_string_depth > 0 {
235                    let string = match value.as_str() {
236                        Some(s) => s,
237                        None => {
238                            return Err(PackerError {
239                                cause: "unknown".to_owned(),
240                            })
241                        }
242                    };
243                    let packed_string = match self.pack_string(
244                        string,
245                        &PackOptions {
246                            no_sequence_id: true,
247                            pack_string_depth: -1,
248                        },
249                    ) {
250                        Ok(packed_string) => packed_string,
251                        Err(err) => return Err(err),
252                    };
253                    results.push(packed_string);
254                } else {
255                    results.push(self.pack_value(value));
256                }
257            } else {
258                results.push(self.pack_value(value));
259            }
260        }
261
262        return Ok(self.try_pack_complex_object(object, results));
263    }
264
265    fn try_pack_complex_object(&mut self, object: &Value, results: Vec<Value>) -> Value {
266        if results.len() > MAX_PACK_COMPLEX_OBJECT_SIZE {
267            return json!(results);
268        }
269
270        for v in &results {
271            if !v.is_number() {
272                return json!(results);
273            }
274        }
275
276        let key = object.to_string();
277        if self.memoised_object_map.contains_key(&key) {
278            let val = self.memoised_object_map.get(&key);
279            return json!(val);
280        }
281
282        self.memoise(&object.to_string(), &key, true);
283
284        return json!(results);
285    }
286
287    fn pack_array(&mut self, object: &[Value], pack_string_depth: i32) -> Value {
288        let mut result: Vec<Value> = Vec::new();
289        result.push(json!(TYPE_ARRAY));
290        for val in object {
291            match self.pack_object_or_value(val, pack_string_depth) {
292                Ok(packed_object) => result.push(packed_object),
293                Err(_err) => {}
294            }
295        }
296
297        json!(result)
298    }
299
300    fn pack_value(&mut self, value: &Value) -> Value {
301        let string = value.to_string();
302        let str_value: &str = match value.as_str() {
303            Some(v) => v,
304            None => string.as_str(),
305        };
306
307        let map_key_string = "~".to_owned() + str_value;
308        let map_key = if value.is_string() {
309            map_key_string.as_str()
310        } else {
311            str_value
312        };
313
314        if self.memoised_map.contains_key(map_key) {
315            let val = self.memoised_map.get(map_key);
316            return json!(val);
317        }
318
319        if value.is_boolean() || value.is_null() {
320            self.memoise(str_value, map_key, false);
321            return json!(value);
322        }
323
324        if value.is_number() {
325            self.memoise(str_value, map_key, false);
326            return json!(str_value);
327        }
328
329        if value.is_string() {
330            self.memoise(str_value, map_key, false);
331            let re = Regex::new(r"^[0-9.]|^~").unwrap();
332            if re.is_match(str_value) {
333                return json!("~".to_owned() + str_value);
334            }
335        }
336
337        return json!(str_value);
338    }
339
340    fn memoise(&mut self, str_value: &str, map_key: &str, is_object: bool) {
341        match self.memoised.get(&self.memoised_index) {
342            Some(found_object) => {
343                let key = &found_object.key;
344                self.memoised_map.remove(key);
345                self.memoised_object_map.remove(key);
346            }
347            None => (),
348        }
349
350        if is_object {
351            self.memoised_object_map
352                .insert(map_key.to_owned(), self.memoised_index);
353        } else {
354            self.memoised_map
355                .insert(map_key.to_owned(), self.memoised_index);
356        }
357
358        self.memoised.insert(
359            self.memoised_index,
360            MemoObject {
361                key: map_key.to_owned(),
362                value: str_value.to_owned(),
363            },
364        );
365        self.memoised_index += 1;
366
367        if self.memoised_index >= (self.max_dict_size + MIN_DICT_INDEX) {
368            self.memoised_index = MIN_DICT_INDEX;
369        }
370    }
371}