1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
// Copyright 2023 The rust-ggstd authors. All rights reserved.
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
use ggstd::bytes;
use ggstd::compress::flate;
fn main() {
example_dictionary();
}
/// A preset dictionary can be used to improve the compression ratio.
/// The downside to using a dictionary is that the compressor and decompressor
/// must agree in advance what dictionary to use.
fn example_dictionary() {
// The dictionary is a string of bytes. When compressing some input data,
// the compressor will attempt to substitute substrings with matches found
// in the dictionary. As such, the dictionary should only contain substrings
// that are expected to be found in the actual data stream.
let dict = b"<?xml version=\"1.0\"?><book><data><meta name=\"\" content=\"";
// The data to compress should (but is not required to) contain frequent
// substrings that match those in the dictionary.
let data = r#"<?xml version="1.0"?>
<book>
<meta name="title" content="The Go Programming Language"/>
<meta name="authors" content="Alan Donovan and Brian Kernighan"/>
<meta name="published" content="2015-10-26"/>
<meta name="isbn" content="978-0134190440"/>
<data>...</data>
</book>
"#;
let mut b = bytes::Buffer::new();
// Compress the data using the specially crafted dictionary.
{
let mut zw = flate::Writer::new_dict(&mut b, flate::DEFAULT_COMPRESSION, dict).unwrap();
let mut data_reader = bytes::new_buffer_string(data);
std::io::copy(&mut data_reader, &mut zw).unwrap();
zw.close().unwrap();
}
// The decompressor must use the same dictionary as the compressor.
// Otherwise, the input may appear as corrupted.
println!("Decompressed output using the dictionary:");
{
let mut data_reader = b.bytes();
let mut zr = flate::Reader::new_dict(&mut data_reader, dict);
let mut decompressed = bytes::Buffer::new();
std::io::copy(&mut zr, &mut decompressed).unwrap();
zr.close().unwrap();
println!("{}", String::from_utf8_lossy(decompressed.bytes()));
println!();
}
// Substitute all of the bytes in the dictionary with a '#' to visually
// demonstrate the approximate effectiveness of using a preset dictionary.
println!("Substrings matched by the dictionary are marked with #:");
{
let hash_dict = vec![b'#'; dict.len()];
let mut zr = flate::Reader::new_dict(&mut b, &hash_dict);
let mut decompressed = bytes::Buffer::new();
std::io::copy(&mut zr, &mut decompressed).unwrap();
zr.close().unwrap();
println!("{}", String::from_utf8_lossy(decompressed.bytes()));
}
// Output:
// Decompressed output using the dictionary:
// <?xml version="1.0"?>
// <book>
// <meta name="title" content="The Go Programming Language"/>
// <meta name="authors" content="Alan Donovan and Brian Kernighan"/>
// <meta name="published" content="2015-10-26"/>
// <meta name="isbn" content="978-0134190440"/>
// <data>...</data>
// </book>
//
// Substrings matched by the dictionary are marked with #:
// #####################
// ######
// ############title###########The Go Programming Language"/#
// ############authors###########Alan Donovan and Brian Kernighan"/#
// ############published###########2015-10-26"/#
// ############isbn###########978-0134190440"/#
// ######...</#####
// </#####
}