Skip to main content

static_web_server/
directory_listing_download.rs

1// SPDX-License-Identifier: MIT OR Apache-2.0
2// This file is part of Static Web Server.
3// See https://static-web-server.net/ for more information
4// Copyright (C) 2019-present Jose Quintana <joseluisq.net>
5
6//! Compress content of a directory into a tarball
7//!
8
9use async_compression::tokio::write::GzipEncoder;
10use async_tar::Builder;
11use bytes::BytesMut;
12use clap::ValueEnum;
13use headers::{ContentType, HeaderMapExt};
14use http::{HeaderValue, Method, Response};
15use hyper::{Body, body::Sender};
16use mime_guess::Mime;
17use std::fmt::Display;
18use std::path::Path;
19use std::path::PathBuf;
20use std::str::FromStr;
21use std::task::Poll::{Pending, Ready};
22use tokio::fs;
23use tokio::io;
24use tokio::io::AsyncWriteExt;
25use tokio_util::compat::TokioAsyncWriteCompatExt;
26
27use crate::Result;
28use crate::handler::RequestHandlerOpts;
29use crate::http_ext::MethodExt;
30
31/// query parameter key to download directory as tar.gz
32pub const DOWNLOAD_PARAM_KEY: &str = "download";
33
34/// Download format for directory
35#[derive(Debug, Serialize, Deserialize, Clone, ValueEnum, Eq, Hash, PartialEq)]
36#[serde(rename_all = "lowercase")]
37pub enum DirDownloadFmt {
38    /// Gunzip-compressed tarball (.tar.gz)
39    Targz,
40}
41
42impl Display for DirDownloadFmt {
43    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
44        std::fmt::Debug::fmt(self, f)
45    }
46}
47
48/// Directory download options.
49pub struct DirDownloadOpts<'a> {
50    /// Request method.
51    pub method: &'a Method,
52    /// Prevent following symlinks for files and directories.
53    pub disable_symlinks: bool,
54    /// Ignore hidden files (dotfiles).
55    pub ignore_hidden_files: bool,
56}
57
58/// Initializes directory listing download
59pub fn init(formats: &Vec<DirDownloadFmt>, handler_opts: &mut RequestHandlerOpts) {
60    for fmt in formats {
61        // Use naive implementation since the list is not expected to be long
62        if !handler_opts.dir_listing_download.contains(fmt) {
63            tracing::info!("directory listing download: enabled format {}", &fmt);
64            handler_opts.dir_listing_download.push(fmt.to_owned());
65        }
66    }
67    tracing::info!(
68        "directory listing download: enabled={}",
69        !handler_opts.dir_listing_download.is_empty()
70    );
71}
72
73/// impl AsyncWrite for hyper::Body::Sender
74pub struct ChannelBuffer {
75    s: Sender,
76}
77
78impl tokio::io::AsyncWrite for ChannelBuffer {
79    fn poll_write(
80        self: std::pin::Pin<&mut Self>,
81        cx: &mut std::task::Context<'_>,
82        buf: &[u8],
83    ) -> std::task::Poll<Result<usize, std::io::Error>> {
84        let this = self.get_mut();
85        let b = BytesMut::from(buf);
86        match this.s.poll_ready(cx) {
87            Ready(r) => match r {
88                Ok(()) => match this.s.try_send_data(b.freeze()) {
89                    Ok(_) => Ready(Ok(buf.len())),
90                    Err(_) => Pending,
91                },
92                Err(e) => Ready(Err(io::Error::new(io::ErrorKind::BrokenPipe, e))),
93            },
94            Pending => Pending,
95        }
96    }
97
98    fn poll_flush(
99        self: std::pin::Pin<&mut Self>,
100        _cx: &mut std::task::Context<'_>,
101    ) -> std::task::Poll<Result<(), std::io::Error>> {
102        std::task::Poll::Ready(Ok(()))
103    }
104
105    fn poll_shutdown(
106        self: std::pin::Pin<&mut Self>,
107        _cx: &mut std::task::Context<'_>,
108    ) -> std::task::Poll<Result<(), std::io::Error>> {
109        std::task::Poll::Ready(Ok(()))
110    }
111}
112
113async fn archive(
114    path: PathBuf,
115    src_path: PathBuf,
116    cb: ChannelBuffer,
117    follow_symlinks: bool,
118    ignore_hidden: bool,
119) -> Result {
120    let gz = GzipEncoder::with_quality(cb, async_compression::Level::Default);
121    let mut a = Builder::new(gz.compat_write());
122    a.follow_symlinks(follow_symlinks);
123
124    // NOTE: Since it is not possible to handle error gracefully, we will
125    // just stop writing when error occurs. It is also not possible to call
126    // sender.abort() as it is protected behind the Builder to ensure
127    // finish() is successfully called.
128
129    // adapted from async_tar::Builder::append_dir_all
130    let mut stack = vec![(src_path.to_path_buf(), true, false)];
131    while let Some((src, is_dir, is_symlink)) = stack.pop() {
132        let dest = path.join(src.strip_prefix(&src_path)?);
133
134        // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true
135        if is_dir || (is_symlink && follow_symlinks && src.is_dir()) {
136            let mut entries = fs::read_dir(&src).await?;
137            while let Some(entry) = entries.next_entry().await? {
138                // Check and ignore the current hidden file/directory (dotfile) if feature enabled
139                let name = entry.file_name();
140                if ignore_hidden && name.as_encoded_bytes().first().is_some_and(|c| *c == b'.') {
141                    continue;
142                }
143
144                let file_type = entry.file_type().await?;
145                stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink()));
146            }
147            if dest != Path::new("") {
148                a.append_dir(&dest, &src).await?;
149            }
150        } else {
151            // use append_path_with_name to handle symlink
152            a.append_path_with_name(src, &dest).await?;
153        }
154    }
155
156    a.finish().await?;
157    // this is required to emit gzip CRC trailer
158    a.into_inner().await?.into_inner().shutdown().await?;
159
160    Ok(())
161}
162
163/// Reply with archived directory content in compressed tarball format.
164/// The content from `src_path` on server filesystem will be stored to `path`
165/// within the tarball.
166/// An async task will be spawned to asynchronously write compressed data to the
167/// response body.
168pub fn archive_reply<P, Q>(path: P, src_path: Q, opts: DirDownloadOpts<'_>) -> Response<Body>
169where
170    P: AsRef<Path>,
171    Q: AsRef<Path>,
172{
173    let archive_name = path.as_ref().with_extension("tar.gz");
174    let mut resp = Response::new(Body::empty());
175
176    resp.headers_mut().typed_insert(ContentType::from(
177        Mime::from_str("application/gzip").unwrap_or(mime_guess::mime::APPLICATION_OCTET_STREAM),
178    ));
179
180    // A safe `Content-Disposition` value that combines an
181    // ASCII-safe quoted-string `filename=...` (for legacy user agents) and
182    // an RFC 5987 `filename*=UTF-8''<percent-encoded>` (for modern UAs).
183    let archive_name_str = archive_name.to_string_lossy();
184    let ascii_safe = sanitize_filename_for_quoted_string(&archive_name_str);
185    let percent_encoded = rfc5987_encode_filename(&archive_name_str);
186    let hvals =
187        format!("attachment; filename=\"{ascii_safe}\"; filename*=UTF-8''{percent_encoded}");
188    match HeaderValue::from_str(hvals.as_str()) {
189        Ok(hval) => {
190            resp.headers_mut()
191                .insert(hyper::header::CONTENT_DISPOSITION, hval);
192        }
193        Err(err) => {
194            // not fatal, most browser is able to handle the download since
195            // content-type is set
196            tracing::error!("can't make content disposition from {}: {:?}", hvals, err);
197        }
198    }
199
200    // We skip the body for HEAD requests
201    if opts.method.is_head() {
202        return resp;
203    }
204
205    let (tx, body) = Body::channel();
206    tokio::task::spawn(archive(
207        path.as_ref().into(),
208        src_path.as_ref().into(),
209        ChannelBuffer { s: tx },
210        !opts.disable_symlinks,
211        opts.ignore_hidden_files,
212    ));
213    *resp.body_mut() = body;
214
215    resp
216}
217
218/// Sanitize a filename for use inside an HTTP `Content-Disposition`
219/// `filename="..."` quoted-string. Strips characters that would break the
220/// quoted-string framing (`"`, `\`) or HTTP header parsing (`\r`, `\n`, NUL,
221/// and other ASCII control bytes), and replaces any non-ASCII byte with
222/// `_`. The lossy ASCII filename is paired with an RFC 5987 `filename*=`
223/// variant carrying the full UTF-8 name (see `rfc5987_encode_filename`).
224#[doc(hidden)]
225pub fn sanitize_filename_for_quoted_string(name: &str) -> String {
226    let mut out = String::with_capacity(name.len());
227    for ch in name.chars() {
228        match ch {
229            '"' | '\\' => out.push('_'),
230            c if (c as u32) < 0x20 || c == '\x7f' => out.push('_'),
231            c if c.is_ascii() => out.push(c),
232            _ => out.push('_'),
233        }
234    }
235    if out.is_empty() {
236        out.push_str("download");
237    }
238    out
239}
240
241/// Percent-encode a filename per RFC 5987 (the `attr-char` production
242/// from RFC 8187). Used as the `filename*=UTF-8''<value>` parameter so
243/// non-ASCII filenames survive transit to modern user agents.
244#[doc(hidden)]
245pub fn rfc5987_encode_filename(name: &str) -> String {
246    // RFC 8187 `attr-char` allows: ALPHA / DIGIT and `! # $ & + - . ^ _ ` | ~`
247    // Everything else (including `"`, `\`, space, control bytes, and any
248    // non-ASCII byte) is percent-encoded as `%HH`.
249    fn is_attr_char(b: u8) -> bool {
250        b.is_ascii_alphanumeric()
251            || matches!(
252                b,
253                b'!' | b'#' | b'$' | b'&' | b'+' | b'-' | b'.' | b'^' | b'_' | b'`' | b'|' | b'~'
254            )
255    }
256    let mut out = String::with_capacity(name.len());
257    for &b in name.as_bytes() {
258        if is_attr_char(b) {
259            out.push(b as char);
260        } else {
261            use std::fmt::Write;
262            let _ = write!(out, "%{b:02X}");
263        }
264    }
265    out
266}
267
268#[cfg(test)]
269mod tests {
270    use super::{rfc5987_encode_filename, sanitize_filename_for_quoted_string};
271
272    /// SECURITY: A directory name containing `"` or `\` must NOT break out
273    /// of the `Content-Disposition` quoted-string framing.
274    #[test]
275    fn sanitize_strips_quote_and_backslash() {
276        let out = sanitize_filename_for_quoted_string("evil\".tar.gz");
277        assert!(!out.contains('"'));
278        let out2 = sanitize_filename_for_quoted_string("a\\b.tar.gz");
279        assert!(!out2.contains('\\'));
280    }
281
282    /// SECURITY: Control bytes (CR/LF/NUL) must not survive into a header
283    /// value — they could be reflected if a downstream proxy mishandles
284    /// `Content-Disposition`.
285    #[test]
286    fn sanitize_strips_control_bytes() {
287        let out = sanitize_filename_for_quoted_string("a\r\nb\tc\x00d");
288        for ch in out.chars() {
289            assert!(
290                ch as u32 >= 0x20 && ch != '\x7f',
291                "control byte leaked: {:?}",
292                ch
293            );
294        }
295    }
296
297    /// Non-ASCII characters are dropped from the quoted-string variant
298    /// (browsers fall back to the `filename*=UTF-8''...` parameter for
299    /// these).
300    #[test]
301    fn sanitize_replaces_non_ascii() {
302        let out = sanitize_filename_for_quoted_string("rep\u{00f6}rt.tar.gz");
303        assert!(out.is_ascii());
304        assert!(out.starts_with("rep_rt") || out.starts_with("rep__rt"));
305    }
306
307    #[test]
308    fn sanitize_never_empty() {
309        assert_eq!(sanitize_filename_for_quoted_string(""), "download");
310    }
311
312    /// RFC 5987 / RFC 8187 attr-char alphabet must round-trip unchanged.
313    #[test]
314    fn rfc5987_preserves_attr_char_alphabet() {
315        let input = "abcXYZ0189!#$&+-.^_`|~";
316        assert_eq!(rfc5987_encode_filename(input), input);
317    }
318
319    /// Everything outside attr-char must be percent-encoded — in
320    /// particular, `"`, `\`, space, CR, LF, and any non-ASCII byte.
321    #[test]
322    fn rfc5987_encodes_unsafe_bytes() {
323        assert_eq!(rfc5987_encode_filename("a b"), "a%20b");
324        assert_eq!(rfc5987_encode_filename("a\"b"), "a%22b");
325        assert_eq!(rfc5987_encode_filename("a\\b"), "a%5Cb");
326        assert_eq!(rfc5987_encode_filename("a\r\nb"), "a%0D%0Ab");
327        // UTF-8 `\u{00f6}` = 0xC3 0xB6
328        assert_eq!(rfc5987_encode_filename("\u{00f6}"), "%C3%B6");
329    }
330}