pingora_header_serde/
dict.rs

1// Copyright 2024 Cloudflare, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Training to generate the zstd dictionary.
16
17use std::fs;
18use zstd::dict;
19
20/// Train the zstd dictionary from all the files under the given `dir_path`
21///
22/// The output will be the trained dictionary
23pub fn train<P: AsRef<std::path::Path>>(dir_path: P) -> Vec<u8> {
24    // TODO: check f is file, it can be dir
25    let files = fs::read_dir(dir_path)
26        .unwrap()
27        .filter_map(|entry| entry.ok().map(|f| f.path()));
28    dict::from_files(files, 64 * 1024 * 1024).unwrap()
29}
30
31#[cfg(test)]
32mod test {
33    use super::*;
34    use crate::resp_header_to_buf;
35    use pingora_http::ResponseHeader;
36
37    fn gen_test_dict() -> Vec<u8> {
38        let mut path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
39        path.push("samples/test");
40        train(path)
41    }
42
43    fn gen_test_header() -> ResponseHeader {
44        let mut header = ResponseHeader::build(200, None).unwrap();
45        header
46            .append_header("Date", "Thu, 23 Dec 2021 11:23:29 GMT")
47            .unwrap();
48        header
49            .append_header("Last-Modified", "Sat, 09 Oct 2021 22:41:34 GMT")
50            .unwrap();
51        header.append_header("Connection", "keep-alive").unwrap();
52        header.append_header("Vary", "Accept-encoding").unwrap();
53        header.append_header("Content-Encoding", "gzip").unwrap();
54        header
55            .append_header("Access-Control-Allow-Origin", "*")
56            .unwrap();
57        header
58    }
59
60    #[test]
61    fn test_ser_with_dict() {
62        let dict = gen_test_dict();
63        let serde = crate::HeaderSerde::new(Some(dict));
64        let serde_no_dict = crate::HeaderSerde::new(None);
65        let header = gen_test_header();
66
67        let compressed = serde.serialize(&header).unwrap();
68        let compressed_no_dict = serde_no_dict.serialize(&header).unwrap();
69        let mut buf = vec![];
70        let uncompressed = resp_header_to_buf(&header, &mut buf);
71
72        assert!(compressed.len() < uncompressed);
73        assert!(compressed.len() < compressed_no_dict.len());
74    }
75
76    #[test]
77    fn test_deserialize_with_dict() {
78        let dict = gen_test_dict();
79        let serde = crate::HeaderSerde::new(Some(dict));
80        let serde_no_dict = crate::HeaderSerde::new(None);
81        let header = gen_test_header();
82
83        let compressed = serde.serialize(&header).unwrap();
84        let compressed_no_dict = serde_no_dict.serialize(&header).unwrap();
85
86        let from_dict_header = serde.deserialize(&compressed).unwrap();
87        let from_no_dict_header = serde_no_dict.deserialize(&compressed_no_dict).unwrap();
88
89        assert_eq!(from_dict_header.status, from_no_dict_header.status);
90        assert_eq!(from_dict_header.headers, from_no_dict_header.headers);
91    }
92
93    #[test]
94    fn test_ser_de_with_dict() {
95        let dict = gen_test_dict();
96        let serde = crate::HeaderSerde::new(Some(dict));
97        let header = gen_test_header();
98
99        let compressed = serde.serialize(&header).unwrap();
100        let header2 = serde.deserialize(&compressed).unwrap();
101
102        assert_eq!(header.status, header2.status);
103        assert_eq!(header.headers, header2.headers);
104    }
105}