libcachebust/
processor.rs

1// Copyright (C) 2022  Aravinth Manivannan <realaravinth@batsense.net>
2// SPDX-FileCopyrightText: 2023 Aravinth Manivannan <realaravinth@batsense.net>
3//
4// SPDX-License-Identifier: Apache-2.0
5// SPDX-License-Identifier: MIT
6
7//! Module describing file processor that changes filenames to setup cache-busting
8//!
9//! Run the following during build using `build.rs`:
10//!
11//! ```rust
12//! use libcachebust::BusterBuilder;
13//!
14//! // note: add error checking yourself.
15//! //    println!("cargo:rustc-env=GIT_process={}", git_process);
16//! let types = vec![
17//!     mime::IMAGE_PNG,
18//!     mime::IMAGE_SVG,
19//!     mime::IMAGE_JPEG,
20//!     mime::IMAGE_GIF,
21//! ];
22//!
23//! let config = BusterBuilder::default()
24//!     .source("./dist")
25//!     .result("./prod")
26//!     .mime_types(types)
27//!     .follow_links(true)
28//!     .build()
29//!     .unwrap();
30//!
31//! config.process().unwrap();
32//! ```
33//!
34//! There's a runtime component to this library which will let you read modified
35//! filenames from within your program. See [Files]
36
37use std::collections::HashMap;
38use std::io::Error;
39use std::path::Path;
40use std::{fs, path::PathBuf};
41
42use derive_builder::Builder;
43use serde::{Deserialize, Serialize};
44use walkdir::WalkDir;
45
46use crate::*;
47
48#[derive(Debug, Clone)]
49/// Items to avoid hash calculation.
50///
51/// This is useful when serving vendor static files which are interlinked, where chaing
52/// file names should mean changing how the vendor files pulls its dependencies --- which are
53/// beyond the abilities of `libcachebust`.
54///
55/// ```rust
56/// use libcachebust::NoHashCategory;
57///
58/// let extensions = NoHashCategory::FileExtentions(vec!["wasm"]);
59/// let files = NoHashCategory::FileExtentions(vec!["swagger-ui-bundle.js", "favicon-16x16.png"]);
60/// ```
61pub enum NoHashCategory<'a> {
62    /// vector of file extensions that should be avoided for hash processing
63    FileExtentions(Vec<&'a str>),
64    /// list of file paths that should be avoided for file processing
65    FilePaths(Vec<&'a str>),
66}
67
68/// Configuration for setting up cache-busting
69#[derive(Debug, Clone, Builder)]
70#[builder(build_fn(validate = "Self::validate"))]
71pub struct Buster<'a> {
72    /// source directory
73    #[builder(setter(into))]
74    source: String,
75    /// mime_types for hashing
76    #[builder(setter(into, strip_option), default)]
77    mime_types: Option<Vec<mime::Mime>>,
78    /// directory for writing results
79    #[builder(setter(into))]
80    result: String,
81    #[builder(setter(into, strip_option), default)]
82    /// route prefixes
83    prefix: Option<String>,
84    /// follow symlinks?
85    follow_links: bool,
86    /// exclude these files for hashing.
87    /// They will be copied over without including a hash in the filename
88    /// Path should be relative to [self.source]
89    #[builder(default)]
90    no_hash: Vec<NoHashCategory<'a>>,
91}
92
93impl<'a> BusterBuilder<'a> {
94    fn validate(&self) -> Result<(), String> {
95        for no_hash_configs in self.no_hash.iter() {
96            for no_hash in no_hash_configs.iter() {
97                if let NoHashCategory::FilePaths(files) = no_hash {
98                    for file in files.iter() {
99                        if !Path::new(&self.source.as_ref().unwrap())
100                            .join(file)
101                            .exists()
102                        {
103                            return Err(format!("File {} doesn't exist", file));
104                        }
105                    }
106                }
107            }
108        }
109        Ok(())
110    }
111}
112
113impl<'a> Buster<'a> {
114    // creates base_dir to output files to
115    fn init(&self) -> Result<(), Error> {
116        let res = Path::new(&self.result);
117        println!("cargo:rerun-if-changed={}", self.source);
118        if res.exists() {
119            fs::remove_dir_all(&self.result).unwrap();
120        }
121
122        fs::create_dir(&self.result).unwrap();
123        self.create_dir_structure(Path::new(&self.source))?;
124        Ok(())
125    }
126
127    fn hasher(payload: &[u8]) -> String {
128        use data_encoding::HEXUPPER;
129        use sha2::{Digest, Sha256};
130        let mut hasher = Sha256::new();
131        hasher.update(payload);
132        HEXUPPER.encode(&hasher.finalize())
133    }
134
135    /// Processes files.
136    ///
137    /// Panics when a weird MIME is encountered.
138    pub fn process(&self) -> Result<(), Error> {
139        // panics when mimetypes are detected. This way you'll know which files are ignored
140        // from processing
141
142        self.init()?;
143        let mut file_map: Files = Files::new(&self.result);
144
145        let mut process_worker = |path: &Path| {
146            let contents = Self::read_to_string(path).unwrap();
147            let hash = Self::hasher(&contents);
148
149            let get_name = |no_hash: bool| -> String {
150                if no_hash {
151                    format!(
152                        "{}.{}",
153                        path.file_stem().unwrap().to_str().unwrap(),
154                        path.extension().unwrap().to_str().unwrap()
155                    )
156                } else {
157                    format!(
158                        "{}.{}.{}",
159                        path.file_stem().unwrap().to_str().unwrap(),
160                        hash,
161                        path.extension().unwrap().to_str().unwrap()
162                    )
163                }
164            };
165
166            let no_hash_status = self.no_hash.iter().any(|no_hash| {
167                match no_hash {
168                    NoHashCategory::FilePaths(paths) => {
169                        let no_hash_status = paths
170                            .iter()
171                            .any(|file_path| Path::new(&self.source).join(&file_path) == path);
172                        no_hash_status
173                    }
174                    NoHashCategory::FileExtentions(extensions) => {
175                        let mut no_hash_status = false;
176                        if let Some(cur_extention) = path.extension() {
177                            // .unwrap().to_str().unwrap();
178                            if let Some(cur_extention) = cur_extention.to_str() {
179                                no_hash_status = extensions.iter().any(|ext| &cur_extention == ext);
180                            }
181                        }
182                        no_hash_status
183                    }
184                }
185            });
186
187            let new_name = get_name(no_hash_status);
188
189            self.copy(path, &new_name);
190            let (source, destination) = self.gen_map(path, &new_name);
191            let _ = file_map.add(
192                source.to_str().unwrap().into(),
193                destination.to_str().unwrap().into(),
194            );
195        };
196
197        for entry in WalkDir::new(&self.source)
198            .follow_links(self.follow_links)
199            .into_iter()
200        {
201            let entry = entry?;
202
203            let path = entry.path();
204            if !path.is_dir() {
205                let path = Path::new(&path);
206
207                match self.mime_types.as_ref() {
208                    Some(mime_types) => {
209                        for mime_type in mime_types.iter() {
210                            let file_mime =
211                                mime_guess::from_path(path).first().unwrap_or_else(|| {
212                                    panic!("couldn't resolve MIME for file: {:?}", &path)
213                                });
214                            if &file_mime == mime_type {
215                                process_worker(path);
216                            }
217                        }
218                    }
219                    None => process_worker(path),
220                }
221            }
222        }
223
224        file_map.to_env();
225        Ok(())
226    }
227
228    // helper fn to read file to string
229    fn read_to_string(path: &Path) -> Result<Vec<u8>, Error> {
230        use std::fs::File;
231        use std::io::Read;
232
233        let mut file_content = Vec::new();
234        let mut file = File::open(path)?;
235        file.read_to_end(&mut file_content).expect("Unable to read");
236        Ok(file_content)
237    }
238
239    // helper fn to generate filemap
240    fn gen_map<'b>(&self, source: &'b Path, name: &str) -> (&'b Path, PathBuf) {
241        let rel_location = source.strip_prefix(&self.source).unwrap().parent().unwrap();
242        if let Some(prefix) = &self.prefix {
243            //panic!("{}", &prefix);
244            let mut result = self.result.as_str();
245            if result.starts_with('/') {
246                result = &self.result[1..];
247            }
248            let destination = Path::new(prefix)
249                .join(&result)
250                .join(rel_location)
251                .join(name);
252
253            (source, destination)
254        } else {
255            let destination = Path::new(&self.result).join(rel_location).join(name);
256            (source, destination)
257        }
258    }
259
260    // helper fn to copy files
261    fn copy(&self, source: &Path, name: &str) {
262        let rel_location = source.strip_prefix(&self.source).unwrap().parent().unwrap();
263        let destination = Path::new(&self.result).join(rel_location).join(name);
264        fs::copy(source, &destination).unwrap();
265    }
266
267    // helper fn to create directory structure in self.base_dir
268    fn create_dir_structure(&self, path: &Path) -> Result<(), Error> {
269        for entry in WalkDir::new(&path)
270            .follow_links(self.follow_links)
271            .into_iter()
272        {
273            let entry = entry?;
274            let entry_path = entry.path();
275            let entry_path = Path::new(&entry_path);
276
277            if entry_path.is_dir() && path != entry_path {
278                Self::create_dir_structure(self, entry_path)?;
279            } else if entry_path.is_dir() {
280                let rel_location = entry_path.strip_prefix(&self.source).unwrap();
281                let destination = Path::new(&self.result).join(rel_location);
282                if !destination.exists() {
283                    fs::create_dir(destination)?
284                }
285            }
286        }
287        Ok(())
288    }
289}
290/// Filemap struct
291///
292/// maps original names to generated names
293#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
294struct Files {
295    /// filemap<original-path, modified-path>
296    pub map: HashMap<String, String>,
297    base_dir: String,
298}
299
300impl Files {
301    /// Initialize map
302    fn new(base_dir: &str) -> Self {
303        Files {
304            map: HashMap::default(),
305            base_dir: base_dir.into(),
306        }
307    }
308
309    /// Create file map: map original path to modified paths
310    fn add(&mut self, k: String, v: String) -> Result<(), &'static str> {
311        if let std::collections::hash_map::Entry::Vacant(e) = self.map.entry(k) {
312            e.insert(v);
313            Ok(())
314        } else {
315            Err("key exists")
316        }
317    }
318
319    /// This crate uses compile-time environment variables to transfer
320    /// data to the main program. This funtction sets that variable
321    fn to_env(&self) {
322        let json = serde_json::to_string(&self).unwrap();
323        let res = Path::new(CACHE_BUSTER_DATA_FILE);
324        if res.exists() {
325            fs::remove_file(&res).unwrap();
326        }
327        fs::write(CACHE_BUSTER_DATA_FILE, &json).unwrap();
328    }
329
330    #[cfg(test)]
331    /// Load filemap in main program. Should be called from main program
332    fn load() -> Self {
333        let map = fs::read_to_string(CACHE_BUSTER_DATA_FILE).unwrap();
334        let res: Files = serde_json::from_str(&map).unwrap();
335        res
336    }
337}
338
339#[cfg(test)]
340pub mod tests {
341    use super::*;
342
343    pub fn cleanup(config: &Buster<'_>) {
344        let _ = fs::remove_dir_all(&config.result);
345        delete_file();
346    }
347
348    pub fn delete_file() {
349        let _ = fs::remove_file(&CACHE_BUSTER_DATA_FILE);
350    }
351
352    #[test]
353    fn no_hash_validation_works() {
354        let types = vec![
355            mime::IMAGE_PNG,
356            mime::IMAGE_SVG,
357            mime::IMAGE_JPEG,
358            mime::IMAGE_GIF,
359        ];
360
361        let no_hash =
362            NoHashCategory::FilePaths(vec!["bbell.svg", "eye.svg", "a/b/c/d/s/d/svg/10.svg"]);
363
364        assert!(BusterBuilder::default()
365            .source("./dist")
366            .result("/tmp/prod2i")
367            .mime_types(types)
368            .follow_links(true)
369            .prefix("/test")
370            .no_hash(vec![no_hash.clone()])
371            .build()
372            .is_err())
373    }
374
375    fn no_specific_mime() {
376        delete_file();
377        //use std::{thread, time};
378
379        //let sleep = time::Duration::from_secs(4);
380
381        //thread::sleep(sleep);
382
383        const WASM: &str = "858fd6c482cc75111d54.module.wasm";
384        let no_hash_files = vec![WASM, "bell.svg", "eye.svg", "a/b/c/d/s/d/svg/10.svg"];
385        let no_hash = NoHashCategory::FilePaths(no_hash_files.clone());
386        let config = BusterBuilder::default()
387            .source("./dist")
388            .result("/tmp/prod2ii")
389            .follow_links(true)
390            .no_hash(vec![no_hash.clone()])
391            .build()
392            .unwrap();
393        config.process().unwrap();
394        let files = Files::load();
395
396        let no_hash_file = Path::new(&config.result).join(WASM);
397        assert!(files.map.iter().any(|(k, v)| {
398            let source = Path::new(&config.source).join(k);
399            let dest = Path::new(&v);
400            dest.file_name() == no_hash_file.file_name()
401                && dest.exists()
402                && source.file_name() == dest.file_name()
403        }));
404
405        no_hash_files.iter().for_each(|file| {
406            assert!(files.map.iter().any(|(k, v)| {
407                let source = Path::new(k);
408                let dest = Path::new(&v);
409                let no_hash = Path::new(file);
410                source == Path::new(&config.source).join(file)
411                    && dest.exists()
412                    && no_hash.file_name() == dest.file_name()
413            }));
414        });
415
416        for (k, v) in files.map.iter() {
417            let src = Path::new(&k);
418            let dest = Path::new(&v);
419
420            assert_eq!(src.exists(), dest.exists());
421        }
422
423        cleanup(&config);
424    }
425
426    fn prefix_works() {
427        delete_file();
428        let types = vec![
429            mime::IMAGE_PNG,
430            mime::IMAGE_SVG,
431            mime::IMAGE_JPEG,
432            mime::IMAGE_GIF,
433        ];
434
435        let config = BusterBuilder::default()
436            .source("./dist")
437            .result("/tmp/prod2i")
438            .mime_types(types)
439            .follow_links(true)
440            .prefix("/test")
441            .build()
442            .unwrap();
443
444        config.process().unwrap();
445        let mut files = Files::load();
446
447        if let Some(prefix) = &config.prefix {
448            for (k, v) in files.map.drain() {
449                let src = Path::new(&k);
450                let dest = Path::new(&v[prefix.len()..]);
451
452                assert_eq!(src.exists(), dest.exists());
453            }
454        }
455
456        cleanup(&config);
457    }
458
459    fn no_hash_extension_works() {
460        delete_file();
461        use std::{thread, time};
462
463        let sleep = time::Duration::from_secs(4);
464        const APPLICATION_WASM: &str = "wasm";
465        const WASM: &str = "858fd6c482cc75111d54.module.wasm";
466
467        thread::sleep(sleep);
468
469        let no_hash_extensions = vec![APPLICATION_WASM];
470        let no_hash_ext = NoHashCategory::FileExtentions(no_hash_extensions.clone());
471
472        let no_hash_paths = vec!["bell.svg", "eye.svg", "a/b/c/d/s/d/svg/10.svg"];
473        let no_hash_cat = NoHashCategory::FilePaths(no_hash_paths.clone());
474        let no_hash = vec![no_hash_cat, no_hash_ext];
475
476        let config = BusterBuilder::default()
477            .source("./dist")
478            .result("/tmp/prodnohashextension")
479            .follow_links(true)
480            .no_hash(no_hash.clone())
481            .build()
482            .unwrap();
483        config.process().unwrap();
484        let files = Files::load();
485
486        assert!(files.map.iter().any(|(_k, v)| {
487            let dest = Path::new(&v);
488            dest.extension().unwrap().to_str().unwrap() == APPLICATION_WASM && dest.exists()
489        }));
490
491        let no_hash_file = Path::new(&config.result).join(WASM);
492        assert!(files.map.iter().any(|(k, v)| {
493            let source = Path::new(&config.source).join(k);
494            let dest = Path::new(&v);
495            dest.file_name() == no_hash_file.file_name()
496                && dest.exists()
497                && source.file_name() == dest.file_name()
498        }));
499
500        no_hash_paths.iter().for_each(|file| {
501            assert!(files.map.iter().any(|(k, v)| {
502                let source = Path::new(k);
503                let dest = Path::new(&v);
504                let no_hash = Path::new(file);
505                source == Path::new(&config.source).join(file)
506                    && dest.exists()
507                    && no_hash.file_name() == dest.file_name()
508            }));
509        });
510
511        for (k, v) in files.map.iter() {
512            let src = Path::new(&k);
513            let dest = Path::new(&v);
514
515            assert_eq!(src.exists(), dest.exists());
516        }
517
518        cleanup(&config);
519    }
520
521    pub fn runner() {
522        prefix_works();
523        no_specific_mime();
524        no_hash_extension_works();
525    }
526}