add_determinism/add_det/handlers/
gzip.rs

1/* SPDX-License-Identifier: GPL-3.0-or-later */
2
3use anyhow::{bail, Result};
4use log::{debug, info};
5use std::io;
6use std::io::{BufWriter, Read, Write};
7use std::path::Path;
8use std::sync::Arc;
9
10use super::{config, InputOutputHelper};
11
12const GZIP_MAGIC: &[u8] = &[0x1F, 0x8B];
13
14// Based on https://www.ietf.org/rfc/rfc1952.txt.
15
16pub struct Gzip {
17    config: Arc<config::Config>,
18    unix_epoch: Option<u32>,
19}
20
21impl Gzip {
22    pub fn boxed(config: &Arc<config::Config>) -> Box<dyn super::Processor + Send + Sync> {
23        Box::new(Self {
24            config: config.clone(),
25            unix_epoch: None,
26        })
27    }
28}
29
30impl super::Processor for Gzip {
31    fn name(&self) -> &str {
32        "gzip"
33    }
34
35    fn initialize(&mut self) -> Result<()> {
36        if self.config.source_date_epoch.is_none() {
37            bail!("gzip handler requires $SOURCE_DATE_EPOCH to be set");
38        }
39        self.unix_epoch = Some(
40            self.config.source_date_epoch.unwrap().try_into()?
41        );
42
43        Ok(())
44    }
45
46    fn filter(&self, path: &Path) -> Result<bool> {
47        Ok(
48            self.config.ignore_extension ||
49                path.extension().is_some_and(|x| x == "gz")
50        )
51    }
52
53    fn process(&self, input_path: &Path) -> Result<super::ProcessResult> {
54        let (mut io, mut input) = InputOutputHelper::open(input_path, self.config.check, true)?;
55
56        let mut buf = [0; 10];
57        input.read_exact(&mut buf)?;
58
59        if &buf[0..2] != GZIP_MAGIC {
60            return Err(super::Error::BadMagic(2, buf[0..2].to_vec(), GZIP_MAGIC).into());
61        }
62
63        let mtime = &buf[4..8];
64        let mtime = u32::from_le_bytes(mtime.try_into().unwrap());
65        debug!("{}: mtime={} $S_D_E={}", input_path.display(), mtime, self.config.source_date_epoch.unwrap());
66        if mtime <= self.unix_epoch.unwrap() {
67            return io.finalize(false);
68        }
69
70        buf[4..8].copy_from_slice(&self.unix_epoch.unwrap().to_le_bytes());
71        info!("{}: changing content mtime from {} ({}) to {} ({})",
72              input_path.display(),
73              mtime,
74              chrono::DateTime::from_timestamp(mtime as i64, 0).unwrap(),
75              self.unix_epoch.unwrap(),
76              chrono::DateTime::from_timestamp(self.unix_epoch.unwrap() as i64, 0).unwrap());
77
78        io.open_output(false)?;
79        let mut output = BufWriter::new(io.output.as_mut().unwrap().as_file_mut());
80
81        output.write_all(&buf)?;
82
83        // Copy the rest unchanged
84        io::copy(&mut input, &mut output)?;
85
86        output.flush()?;
87        drop(output);
88
89        io.finalize(true)
90    }
91}
92
93#[cfg(test)]
94mod tests {
95    use super::*;
96
97    #[test]
98    fn test_filter_html() {
99        let cfg = Arc::new(config::Config::empty(1704106800, false));
100        let h = Gzip::boxed(&cfg);
101
102        assert!( h.filter(Path::new("/some/path/page.gz")).unwrap());
103        assert!( h.filter(Path::new("/some/path/page.html.gz")).unwrap());
104        assert!(!h.filter(Path::new("/some/path/page.html.agz")).unwrap());
105        assert!(!h.filter(Path::new("/")).unwrap());
106    }
107}