web_static_pack_packer/
file.rs

1//! File helpers. Contains [build_from_path] and [build_from_content] functions
2//! to create a [File] from fs / memory content.
3
4use crate::common::{cache_control::CacheControl, file::File};
5use anyhow::Error;
6use brotli::enc::BrotliEncoderParams;
7use flate2::{Compression, write::GzEncoder};
8use sha3::{Digest, Sha3_256};
9use std::{
10    fs,
11    io::{Cursor, Write},
12    path::Path,
13};
14
15/// Options when preparing file in [build_from_path].
16///
17/// If not sure what to set here, use [Default].
18#[derive(Debug)]
19pub struct BuildFromPathOptions {
20    /// Try adding gzipped version of file. If set to true, it may still not be
21    /// added (ex. in case gzipped version is larger than raw).
22    pub use_gzip: bool,
23    /// Try adding brotli version of file. If set to true, it may still not be
24    /// added (ex. in case gzipped version is larger than raw).
25    pub use_brotli: bool,
26
27    /// Override `content-type` header for this file.
28    pub content_type_override: Option<String>,
29    /// Override [CacheControl] for this file.
30    pub cache_control_override: Option<CacheControl>,
31}
32impl Default for BuildFromPathOptions {
33    fn default() -> Self {
34        Self {
35            use_gzip: true,
36            use_brotli: true,
37            content_type_override: None,
38            cache_control_override: None,
39        }
40    }
41}
42
43/// Creates a [File] by reading file from fs, specified by `path`.
44///
45/// Inside file will be read, `content-type` determined
46/// from extension and then passed to [build_from_content].
47///
48/// # Examples
49///
50/// ```
51/// # use anyhow::{anyhow, Error};
52/// # use std::path::PathBuf;
53/// # use web_static_pack_packer::file::{build_from_path, BuildFromPathOptions};
54/// #
55/// # fn main() -> Result<(), Error> {
56/// #
57/// let file = build_from_path(
58///     &PathBuf::from(env!("CARGO_MANIFEST_DIR"))
59///         .parent()
60///         .ok_or_else(|| anyhow!("missing parent"))?
61///         .join("tests")
62///         .join("data")
63///         .join("vcard-personal-portfolio")
64///         .join("index.html"),
65///     &BuildFromPathOptions::default(),
66/// )?;
67/// assert_eq!(file.content_type, "text/html; charset=utf-8");
68/// #
69/// # Ok(())
70/// # }
71/// ```
72pub fn build_from_path(
73    path: &Path,
74    options: &BuildFromPathOptions,
75) -> Result<File, Error> {
76    // read content
77    let content = content_from_path(path)?;
78
79    // use user provided content type if set, otherwise guess from path
80    let content_type = if let Some(content_type) = &options.content_type_override {
81        content_type.clone()
82    } else {
83        content_type_from_path(path)
84    };
85
86    // pass to inner builder
87    let file = build_from_content(
88        content,
89        content_type,
90        &BuildFromContentOptions {
91            use_gzip: options.use_gzip,
92            use_brotli: options.use_brotli,
93            cache_control_override: options.cache_control_override,
94        },
95    );
96
97    Ok(file)
98}
99
100/// Options when preparing file in [build_from_content].
101///
102/// If not sure what to set here, use [Default].
103#[derive(Debug)]
104pub struct BuildFromContentOptions {
105    /// Try adding gzipped version of content. If set to true, it may still not
106    /// be added (ex. in case gzipped version is larger than raw).
107    pub use_gzip: bool,
108    /// Try adding brotli version of content. If set to true, it may still not
109    /// be added (ex. in case gzipped version is larger than raw).
110    pub use_brotli: bool,
111
112    /// Override [CacheControl] for this file.
113    pub cache_control_override: Option<CacheControl>,
114}
115impl Default for BuildFromContentOptions {
116    fn default() -> Self {
117        Self {
118            use_gzip: true,
119            use_brotli: true,
120            cache_control_override: None,
121        }
122    }
123}
124
125/// Creates a [File] from provided raw content and `content-type`.
126///
127/// Inside compressed versions will be created (according to options), `ETag`
128/// calculated and [CacheControl] set.
129///
130/// When setting `content_type` remember to set charset for text files, eg.
131/// `text/plain; charset=utf-8`.
132///
133/// # Examples
134///
135/// ```
136/// # use anyhow::Error;
137/// # use std::path::PathBuf;
138/// # use web_static_pack_packer::file::{build_from_content, BuildFromContentOptions};
139/// #
140/// # fn main() -> Result<(), Error> {
141/// #
142/// let file = build_from_content(
143///     Box::new(*b"<html>Hello World!</html>"),
144///     "text/html; charset=utf-8".to_owned(),
145///     &BuildFromContentOptions::default(),
146/// );
147/// assert!(file.content_gzip.is_none()); // too short for gzip
148/// assert!(file.content_brotli.is_none()); // too short for gzip
149/// assert_eq!(&*file.content, b"<html>Hello World!</html>");
150/// assert_eq!(file.content_type, "text/html; charset=utf-8");
151/// #
152/// # Ok(())
153/// # }
154/// ```
155pub fn build_from_content(
156    content: Box<[u8]>,
157    content_type: String,
158    options: &BuildFromContentOptions,
159) -> File {
160    let content_gzip = if options.use_gzip {
161        content_gzip_from_content(&content)
162    } else {
163        None
164    };
165    let content_brotli = if options.use_brotli {
166        content_brotli_from_content(&content)
167    } else {
168        None
169    };
170
171    let etag = etag_from_content(&content);
172    let cache_control = if let Some(cache_control) = &options.cache_control_override {
173        *cache_control
174    } else {
175        // we assume, that content is "static" and provide max caching opportunity
176        CacheControl::MaxCache
177    };
178
179    File {
180        content,
181        content_gzip,
182        content_brotli,
183        content_type,
184        etag,
185        cache_control,
186    }
187}
188
189/// Builds content by reading given file.
190fn content_from_path(path: &Path) -> Result<Box<[u8]>, Error> {
191    let content = fs::read(path)?.into_boxed_slice();
192
193    Ok(content)
194}
195/// Builds gzip compressed version of `content`.
196///
197/// Returns [None] if there is no sense in having compressed version in `pack`
198/// (eg. compressed is larger than raw).
199fn content_gzip_from_content(content: &[u8]) -> Option<Box<[u8]>> {
200    // no sense in compressing empty files
201    if content.is_empty() {
202        return None;
203    }
204
205    let mut content_gzip = GzEncoder::new(Vec::new(), Compression::best());
206    content_gzip.write_all(content).unwrap();
207    let content_gzip = content_gzip.finish().unwrap().into_boxed_slice();
208
209    // if gzip is longer then original value - it makes no sense to store it
210    if content_gzip.len() >= content.len() {
211        return None;
212    }
213
214    Some(content_gzip)
215}
216/// Builds brotli compressed version of `content`.
217///
218/// Returns [None] if there is no sense in having compressed version in `pack`
219/// (eg. compressed is larger than raw).
220fn content_brotli_from_content(content: &[u8]) -> Option<Box<[u8]>> {
221    // no sense in compressing empty files
222    if content.is_empty() {
223        return None;
224    }
225
226    let mut content_cursor = Cursor::new(content);
227    let mut content_brotli = Vec::new();
228    let content_brotli_length = brotli::BrotliCompress(
229        &mut content_cursor,
230        &mut content_brotli,
231        &BrotliEncoderParams::default(),
232    )
233    .unwrap();
234    let content_brotli = content_brotli.into_boxed_slice();
235    assert!(content_brotli.len() == content_brotli_length);
236
237    // if brotli is longer then original value - it makes no sense to store it
238    if content_brotli.len() >= content.len() {
239        return None;
240    }
241
242    Some(content_brotli)
243}
244
245/// Guesses `content-type` from file path.
246///
247/// Only path is used, file content is not read. If file type cannot be guessed,
248/// returns "application/octet-stream". For text files (eg. plain, html, css,
249/// js, etc) it assumes utf-8 encoding.
250fn content_type_from_path(path: &Path) -> String {
251    let mut content_type = mime_guess::from_path(path)
252        .first_or_octet_stream()
253        .as_ref()
254        .to_owned();
255
256    if content_type.starts_with("text/") {
257        content_type.push_str("; charset=utf-8");
258    }
259    content_type
260}
261/// Calculates `ETag` header from file contents.
262fn etag_from_content(content: &[u8]) -> String {
263    let mut etag = Sha3_256::new();
264    etag.update(content);
265    let etag = etag.finalize();
266    let etag = format!("\"{:x}\"", &etag); // `ETag` as "quoted" hex sha3. Quote is required by standard
267    etag
268}
269
270#[cfg(test)]
271mod test {
272    use super::{
273        BuildFromContentOptions, build_from_content, content_brotli_from_content,
274        content_gzip_from_content, content_type_from_path, etag_from_content,
275    };
276    use crate::common::file::File;
277    use std::path::{Path, PathBuf};
278    use test_case::test_case;
279
280    #[test]
281    fn build_from_content_returns_expected() {
282        let content_original = b"lorem ipsum lorem ipsum lorem ipsum lorem ipsum lorem ipsum";
283        let content_type_original = "text/plain; charset=utf-8";
284
285        let file = build_from_content(
286            Box::new(*content_original),
287            content_type_original.to_owned(),
288            &BuildFromContentOptions::default(),
289        );
290
291        let File {
292            content,
293            content_gzip,
294            content_brotli,
295            content_type,
296            // implementation dependant
297            // etag,
298            // cache_control,
299            ..
300        } = file;
301        assert_eq!(&*content, content_original);
302        assert_eq!(&*content_gzip.unwrap(), b"\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff\x95\xc6\x41\x09\x00\x00\x08\x03\xc0\x2a\x2b\xe7\x43\xd8\x50\x14\xfb\x9b\x61\xbf\x63\x4d\x08\xd9\x7b\x02\x3d\x3f\x1e\x08\x7c\xb8\x3b\x00\x00\x00");
303        assert_eq!(&*content_brotli.unwrap(), b"\x1b\x3a\x00\xf8\x1d\xa9\x53\x9f\xbb\x70\x9d\xc6\xf6\x06\xa7\xda\xe4\x1a\xa4\x6c\xae\x4e\x18\x15\x0b\x98\x56\x70\x03");
304        assert_eq!(content_type, content_type_original);
305
306        // implementation dependant
307        // assert_eq!(etag, "");
308        // assert_eq!(cache_control, CacheControl::MaxCache);
309    }
310
311    #[test]
312    fn empty_should_not_be_compressed() {
313        assert!(content_gzip_from_content(&[]).is_none());
314        assert!(content_brotli_from_content(&[]).is_none());
315    }
316
317    #[test]
318    fn content_gzip_from_content_returns_expected() {
319        assert_eq!(
320            content_gzip_from_content(b"lorem ipsum lorem ipsum lorem ipsum lorem ipsum lorem ipsum").as_deref(),
321            Some(b"\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff\x95\xc6\x41\x09\x00\x00\x08\x03\xc0\x2a\x2b\xe7\x43\xd8\x50\x14\xfb\x9b\x61\xbf\x63\x4d\x08\xd9\x7b\x02\x3d\x3f\x1e\x08\x7c\xb8\x3b\x00\x00\x00".as_slice())
322        );
323    }
324
325    #[test]
326    fn content_brotli_from_content_returns_expected() {
327        assert_eq!(
328            content_brotli_from_content(b"lorem ipsum lorem ipsum lorem ipsum lorem ipsum lorem ipsum").as_deref(),
329            Some(b"\x1b\x3a\x00\xf8\x1d\xa9\x53\x9f\xbb\x70\x9d\xc6\xf6\x06\xa7\xda\xe4\x1a\xa4\x6c\xae\x4e\x18\x15\x0b\x98\x56\x70\x03".as_slice())
330        );
331    }
332
333    #[test]
334    fn etag_from_content_returns_expected() {
335        // two identical payloads should produce identical `ETag`
336        // two different payloads should produce different `ETag`
337
338        assert_eq!(
339            etag_from_content(b"lorem ipsum"),
340            etag_from_content(b"lorem ipsum")
341        );
342        assert_ne!(
343            etag_from_content(b"lorem ipsum"),
344            etag_from_content(b"ipsum lorem")
345        );
346    }
347
348    #[test_case(
349        &PathBuf::from("a.html"),
350        "text/html; charset=utf-8";
351        "html file"
352    )]
353    #[test_case(
354        &PathBuf::from("directory/styles.css"),
355        "text/css; charset=utf-8";
356        "css file in directory"
357    )]
358    #[test_case(
359        &PathBuf::from("/root/dir/script.00ff00.js"),
360        "text/javascript; charset=utf-8";
361        "js file, full path, with some hex in stem"
362    )]
363    #[test_case(
364        &PathBuf::from("C:\\Users\\example\\Images\\SomeImage.webp"),
365        "image/webp";
366        "webp image in windows style path format"
367    )]
368    fn content_type_from_path_returns_expected(
369        path: &Path,
370        expected: &str,
371    ) {
372        assert_eq!(content_type_from_path(path), expected);
373    }
374}