flate_dict/
flate-dict.rs

1// Copyright 2023 The rust-ggstd authors. All rights reserved.
2// Copyright 2016 The Go Authors. All rights reserved.
3// Use of this source code is governed by a BSD-style
4// license that can be found in the LICENSE file.
5
6use ggstd::bytes;
7use ggstd::compress::flate;
8
9fn main() {
10    example_dictionary();
11}
12
13/// A preset dictionary can be used to improve the compression ratio.
14/// The downside to using a dictionary is that the compressor and decompressor
15/// must agree in advance what dictionary to use.
16fn example_dictionary() {
17    // The dictionary is a string of bytes. When compressing some input data,
18    // the compressor will attempt to substitute substrings with matches found
19    // in the dictionary. As such, the dictionary should only contain substrings
20    // that are expected to be found in the actual data stream.
21    let dict = b"<?xml version=\"1.0\"?><book><data><meta name=\"\" content=\"";
22
23    // The data to compress should (but is not required to) contain frequent
24    // substrings that match those in the dictionary.
25    let data = r#"<?xml version="1.0"?>
26<book>
27    <meta name="title" content="The Go Programming Language"/>
28    <meta name="authors" content="Alan Donovan and Brian Kernighan"/>
29    <meta name="published" content="2015-10-26"/>
30    <meta name="isbn" content="978-0134190440"/>
31    <data>...</data>
32</book>
33"#;
34
35    let mut b = bytes::Buffer::new();
36
37    // Compress the data using the specially crafted dictionary.
38    {
39        let mut zw = flate::Writer::new_dict(&mut b, flate::DEFAULT_COMPRESSION, dict).unwrap();
40        let mut data_reader = bytes::new_buffer_string(data);
41        std::io::copy(&mut data_reader, &mut zw).unwrap();
42        zw.close().unwrap();
43    }
44
45    // The decompressor must use the same dictionary as the compressor.
46    // Otherwise, the input may appear as corrupted.
47    println!("Decompressed output using the dictionary:");
48    {
49        let mut data_reader = b.bytes();
50        let mut zr = flate::Reader::new_dict(&mut data_reader, dict);
51        let mut decompressed = bytes::Buffer::new();
52        std::io::copy(&mut zr, &mut decompressed).unwrap();
53        zr.close().unwrap();
54        println!("{}", String::from_utf8_lossy(decompressed.bytes()));
55        println!();
56    }
57
58    // Substitute all of the bytes in the dictionary with a '#' to visually
59    // demonstrate the approximate effectiveness of using a preset dictionary.
60    println!("Substrings matched by the dictionary are marked with #:");
61    {
62        let hash_dict = vec![b'#'; dict.len()];
63        let mut zr = flate::Reader::new_dict(&mut b, &hash_dict);
64        let mut decompressed = bytes::Buffer::new();
65        std::io::copy(&mut zr, &mut decompressed).unwrap();
66        zr.close().unwrap();
67        println!("{}", String::from_utf8_lossy(decompressed.bytes()));
68    }
69
70    // Output:
71    // Decompressed output using the dictionary:
72    // <?xml version="1.0"?>
73    // <book>
74    // 	<meta name="title" content="The Go Programming Language"/>
75    // 	<meta name="authors" content="Alan Donovan and Brian Kernighan"/>
76    // 	<meta name="published" content="2015-10-26"/>
77    // 	<meta name="isbn" content="978-0134190440"/>
78    // 	<data>...</data>
79    // </book>
80    //
81    // Substrings matched by the dictionary are marked with #:
82    // #####################
83    // ######
84    // 	############title###########The Go Programming Language"/#
85    // 	############authors###########Alan Donovan and Brian Kernighan"/#
86    // 	############published###########2015-10-26"/#
87    // 	############isbn###########978-0134190440"/#
88    // 	######...</#####
89    // </#####
90}