Skip to main content

static_web_server/
static_files.rs

1// SPDX-License-Identifier: MIT OR Apache-2.0
2// This file is part of Static Web Server.
3// See https://static-web-server.net/ for more information
4// Copyright (C) 2019-present Jose Quintana <joseluisq.net>
5
6//! The static file module which powers the web server.
7//!
8
9// Part of the file is borrowed and adapted at a convenience from
10// https://github.com/seanmonstar/warp/blob/master/src/filters/fs.rs
11
12use headers::{AcceptRanges, HeaderMap, HeaderMapExt, HeaderValue};
13use hyper::{Body, Method, Response, StatusCode, header::CONTENT_ENCODING, header::CONTENT_LENGTH};
14use std::cell::RefCell;
15use std::collections::HashSet;
16use std::fs::{File, Metadata};
17use std::io;
18use std::path::{Path, PathBuf};
19
20use crate::Result;
21use crate::conditional_headers::ConditionalHeaders;
22use crate::fs::meta::{FileMetadata, try_file_open, try_metadata, try_metadata_with_html_suffix};
23use crate::fs::path::{PathExt, sanitize_path};
24use crate::http_ext::{HTTP_SUPPORTED_METHODS, MethodExt};
25use crate::response::response_body;
26
27#[cfg(feature = "experimental")]
28use crate::mem_cache::{cache, cache::MemCacheOpts};
29
30use crate::compression_static;
31
32#[cfg(feature = "directory-listing")]
33use crate::{
34    directory_listing,
35    directory_listing::{DirListFmt, DirListOpts},
36};
37
38#[cfg(feature = "directory-listing-download")]
39use crate::directory_listing_download::{
40    DOWNLOAD_PARAM_KEY, DirDownloadFmt, DirDownloadOpts, archive_reply,
41};
42
43const DEFAULT_INDEX_FILES: &[&str; 1] = &["index.html"];
44
45/// Maximum number of containment "OK" decisions cached per worker thread.
46/// Sized for typical static-file workloads where the distinct request paths
47/// are small. When the cap is reached the cache is dropped wholesale; the
48/// next requests pay the `canonicalize` syscall again.
49const CONTAINMENT_CACHE_CAP: usize = 1024;
50
51thread_local! {
52    /// Per-thread set of `probe` paths that have previously been proven
53    /// to live inside the canonical base directory.
54    ///
55    /// Profiling showed the containment check (and its `Path::canonicalize`
56    /// syscall) was the single largest CPU cost on the static-file fast
57    /// path. A workload that repeatedly serves the same documents reaches
58    /// a steady state with effectively no `canonicalize` syscalls. The
59    /// cache is keyed by `PathBuf` so the lookup is a single hash + byte
60    /// compare.
61    ///
62    /// Cache validity: an entry is added only after the slow path has
63    /// proven the probe is contained within `base_path`. The cache is
64    /// not invalidated on filesystem changes. This is acceptable for
65    /// a static-file server: the worst case is a stale "OK" decision
66    /// after an admin renames a directory to a symlink, which is a
67    /// transient state requiring filesystem changes outside SWS.
68    static CONTAINMENT_CACHE: RefCell<HashSet<PathBuf>> =
69        RefCell::new(HashSet::with_capacity(64));
70}
71
72/// Records `probe` as previously-verified-safe in the per-thread
73/// containment cache. When the cache fills, the entire set is dropped
74/// rather than performing per-entry LRU bookkeeping, since the working
75/// set is expected to fit well within `CONTAINMENT_CACHE_CAP`.
76#[inline]
77fn cache_safe_probe(probe: &Path) {
78    CONTAINMENT_CACHE.with(|c| {
79        let mut set = c.borrow_mut();
80        if set.len() >= CONTAINMENT_CACHE_CAP {
81            set.clear();
82        }
83        set.insert(probe.to_path_buf());
84    });
85}
86
87/// Defines all options needed by the static-files handler.
88pub struct HandleOpts<'a> {
89    /// Request method.
90    pub method: &'a Method,
91    /// In-memory files cache feature (experimental).
92    #[cfg(feature = "experimental")]
93    pub memory_cache: Option<&'a MemCacheOpts>,
94    /// Request headers.
95    pub headers: &'a HeaderMap<HeaderValue>,
96    /// Request base path.
97    pub base_path: &'a PathBuf,
98    /// Request base path.
99    pub uri_path: &'a str,
100    /// Index files.
101    pub index_files: &'a [&'a str],
102    /// Request URI query.
103    pub uri_query: Option<&'a str>,
104    /// Directory listing feature.
105    #[cfg(feature = "directory-listing")]
106    #[cfg_attr(docsrs, doc(cfg(feature = "directory-listing")))]
107    pub dir_listing: bool,
108    /// Directory listing order feature.
109    #[cfg(feature = "directory-listing")]
110    #[cfg_attr(docsrs, doc(cfg(feature = "directory-listing")))]
111    pub dir_listing_order: u8,
112    /// Directory listing format feature.
113    #[cfg(feature = "directory-listing")]
114    #[cfg_attr(docsrs, doc(cfg(feature = "directory-listing")))]
115    pub dir_listing_format: &'a DirListFmt,
116    /// Directory listing download feature.
117    #[cfg(feature = "directory-listing-download")]
118    #[cfg_attr(docsrs, doc(cfg(feature = "directory-listing-download")))]
119    pub dir_listing_download: &'a [DirDownloadFmt],
120    /// Redirect trailing slash feature.
121    pub redirect_trailing_slash: bool,
122    /// Compression static feature.
123    pub compression_static: bool,
124    /// Ignore hidden files feature.
125    pub ignore_hidden_files: bool,
126    /// Prevent following symlinks for files and directories.
127    pub disable_symlinks: bool,
128}
129
130/// Static file response type with additional data.
131pub struct StaticFileResponse {
132    /// Inner HTTP response.
133    pub resp: Response<Body>,
134    /// The file path of the inner HTTP response.
135    pub file_path: PathBuf,
136}
137
138/// The server entry point to handle incoming requests which map to specific files
139/// on file system and return a file response.
140pub async fn handle(opts: &HandleOpts<'_>) -> Result<StaticFileResponse, StatusCode> {
141    let method = opts.method;
142    // Check if current HTTP method for incoming request is supported
143    if !method.is_allowed() {
144        return Err(StatusCode::METHOD_NOT_ALLOWED);
145    }
146
147    let uri_path = opts.uri_path;
148    let mut file_path = sanitize_path(opts.base_path, uri_path)?;
149
150    let headers_opt = opts.headers;
151
152    // In-memory file cache feature with eviction policy
153    #[cfg(feature = "experimental")]
154    if opts.memory_cache.is_some() {
155        // NOTE: we only support a default auto index for directory requests
156        // when working on a memory-cache context.
157        if opts.redirect_trailing_slash && uri_path.ends_with('/') {
158            file_path.push("index.html");
159        }
160
161        if let Some(result) = cache::get_or_acquire(file_path.as_path(), headers_opt).await {
162            match result {
163                cache::CacheResult::Hit(result) => {
164                    return Ok(StaticFileResponse {
165                        resp: result?,
166                        file_path,
167                    });
168                }
169                cache::CacheResult::Error(status) => {
170                    return Err(status);
171                }
172                cache::CacheResult::Miss(_permit) => {
173                    // Permit is held while we proceed to read the file below.
174                    // It will be dropped at the end of this scope, after the
175                    // MemCacheFileStream inserts the data into the cache store.
176                }
177            }
178        }
179    }
180
181    let FileMetadata {
182        file_path,
183        metadata,
184        is_dir,
185        precompressed_variant,
186        file: pre_opened,
187    } = get_composed_file_metadata(
188        &mut file_path,
189        headers_opt,
190        opts.compression_static,
191        opts.index_files,
192    )?;
193
194    let mut file_path_temp = file_path.clone();
195    if is_dir {
196        file_path_temp.pop();
197    }
198
199    let file_path_relative = file_path_temp.strip_prefix(opts.base_path).map_err(|err| {
200        tracing::error!(
201            "unable to strip prefix from file path '{}': {}",
202            file_path.display(),
203            err,
204        );
205        StatusCode::NOT_FOUND
206    })?;
207
208    let file_path_resolved =
209        match CONTAINMENT_CACHE.with(|c| c.borrow().contains(file_path_temp.as_path())) {
210            true => file_path_temp.clone(),
211            false => {
212                let resolved = file_path_temp.canonicalize().map_err(|err| {
213                    tracing::error!(
214                        "unable to resolve '{}' symlink path: {}",
215                        file_path_temp.display(),
216                        err,
217                    );
218                    StatusCode::NOT_FOUND
219                })?;
220
221                // a. Fast path: when `base_path` is already canonical (the
222                // production case), the resolved file path will share its
223                // prefix and we avoid a per-request `canonicalize` syscall on
224                // the base directory.
225                if resolved.starts_with(opts.base_path) {
226                    cache_safe_probe(file_path_temp.as_path());
227                    resolved
228                } else {
229                    // b. Fallback: canonicalize the base and retry the check.
230                    let base_path = opts.base_path.canonicalize().map_err(|err| {
231                        tracing::error!(
232                            "unable to resolve '{}' base path: {}",
233                            opts.base_path.display(),
234                            err,
235                        );
236                        StatusCode::NOT_FOUND
237                    })?;
238                    if !resolved.starts_with(&base_path) {
239                        tracing::error!(
240                            "file path '{}' resolves outside of the base path, access denied",
241                            resolved.display()
242                        );
243                        return Err(StatusCode::NOT_FOUND);
244                    }
245                    cache_safe_probe(file_path_temp.as_path());
246                    resolved
247                }
248            }
249        };
250    // Silence unused warning when fast path is hit on subsequent requests.
251    let _ = &file_path_resolved;
252
253    if opts.disable_symlinks {
254        // Check if the whole path or any path component contains a symlink.
255        // Note that this could be expensive as it requires filesystem access for each path component.
256        let has_symlink = file_path_relative
257            .contains_symlink(opts.base_path)
258            .map_err(|err| {
259                tracing::error!(
260                    "unable to check if file path '{}' contains symlink: {}",
261                    file_path_relative.display(),
262                    err,
263                );
264                StatusCode::NOT_FOUND
265            })?;
266
267        if has_symlink {
268            tracing::warn!(
269                "file path '{}' contains a symlink, access denied",
270                file_path.display()
271            );
272            return Err(StatusCode::FORBIDDEN);
273        }
274    }
275
276    // Check for a hidden file/directory (dotfile) and ignore it if feature enabled
277    if opts.ignore_hidden_files && file_path_relative.is_hidden() {
278        tracing::trace!(
279            "considering hidden file {} as not found",
280            file_path.display()
281        );
282        return Err(StatusCode::NOT_FOUND);
283    }
284
285    let resp_file_path = file_path.to_owned();
286
287    // Check for a trailing slash on the current directory path
288    // and redirect if that path doesn't end with the slash char
289    if is_dir && opts.redirect_trailing_slash && !uri_path.ends_with('/') {
290        let query = opts.uri_query.map_or(String::new(), |s| ["?", s].concat());
291        let uri = [uri_path, "/", query.as_str()].concat();
292        let loc = match HeaderValue::from_str(uri.as_str()) {
293            Ok(val) => val,
294            Err(err) => {
295                tracing::error!("invalid header value from current uri: {:?}", err);
296                return Err(StatusCode::INTERNAL_SERVER_ERROR);
297            }
298        };
299
300        let mut resp = Response::new(Body::empty());
301        resp.headers_mut().insert(hyper::header::LOCATION, loc);
302        *resp.status_mut() = StatusCode::PERMANENT_REDIRECT;
303
304        tracing::trace!("uri doesn't end with a slash so redirecting permanently");
305        return Ok(StaticFileResponse {
306            resp,
307            file_path: resp_file_path,
308        });
309    }
310
311    // Respond with the permitted communication methods
312    if method.is_options() {
313        let mut resp = Response::new(Body::empty());
314        *resp.status_mut() = StatusCode::NO_CONTENT;
315        resp.headers_mut()
316            .typed_insert(headers::Allow::from_iter(HTTP_SUPPORTED_METHODS.clone()));
317        resp.headers_mut().typed_insert(AcceptRanges::bytes());
318
319        return Ok(StaticFileResponse {
320            resp,
321            file_path: resp_file_path,
322        });
323    }
324
325    // Directory listing
326    // Check if "directory listing" feature is enabled,
327    // if current path is a valid directory and
328    // if it does not contain an `index.html` file (if a proper auto index is generated)
329    #[cfg(feature = "directory-listing")]
330    if is_dir && opts.dir_listing && !file_path.exists() {
331        // Directory listing download
332        // Check if "directory listing download" feature is enabled,
333        // if current path is a valid directory and
334        // if query string has parameter "download" set
335        #[cfg(feature = "directory-listing-download")]
336        if !opts.dir_listing_download.is_empty()
337            && let Some((_k, _dl_archive_opt)) =
338                form_urlencoded::parse(opts.uri_query.unwrap_or("").as_bytes())
339                    .find(|(k, _v)| k == DOWNLOAD_PARAM_KEY)
340        {
341            // file path is index.html, need pop
342            let mut fp = file_path.clone();
343            fp.pop();
344            if let Some(filename) = fp.file_name() {
345                let resp = archive_reply(
346                    filename,
347                    &fp,
348                    DirDownloadOpts {
349                        method,
350                        disable_symlinks: opts.disable_symlinks,
351                        ignore_hidden_files: opts.ignore_hidden_files,
352                    },
353                );
354                return Ok(StaticFileResponse {
355                    resp,
356                    file_path: resp_file_path,
357                });
358            } else {
359                tracing::error!("Unable to get filename from {}", fp.to_string_lossy());
360                return Err(StatusCode::INTERNAL_SERVER_ERROR);
361            }
362        }
363
364        let resp = directory_listing::auto_index(DirListOpts {
365            root_path: opts.base_path.as_path(),
366            method,
367            current_path: uri_path,
368            uri_query: opts.uri_query,
369            filepath: file_path,
370            dir_listing_order: opts.dir_listing_order,
371            dir_listing_format: opts.dir_listing_format,
372            ignore_hidden_files: opts.ignore_hidden_files,
373            disable_symlinks: opts.disable_symlinks,
374            #[cfg(feature = "directory-listing-download")]
375            dir_listing_download: opts.dir_listing_download,
376        })?;
377
378        return Ok(StaticFileResponse {
379            resp,
380            file_path: resp_file_path,
381        });
382    }
383
384    // Check for a pre-compressed file variant if present under the `opts.compression_static` context
385    if let Some(precompressed_meta) = precompressed_variant {
386        let (precomp_path, precomp_encoding) = precompressed_meta;
387        // Pre-opened handle (if any) refers to the original file we are
388        // about to replace with the precompressed variant; just drop it.
389        drop(pre_opened);
390        let mut resp = file_reply(
391            headers_opt,
392            file_path,
393            &metadata,
394            Some(precomp_path),
395            None,
396            #[cfg(feature = "experimental")]
397            opts.memory_cache,
398        )?;
399
400        // Prepare corresponding headers to let know how to decode the payload
401        resp.headers_mut().remove(CONTENT_LENGTH);
402        let encoding = match HeaderValue::from_str(precomp_encoding.as_str()) {
403            Ok(val) => val,
404            Err(err) => {
405                tracing::error!(
406                    "unable to parse header value from content encoding: {:?}",
407                    err
408                );
409                return Err(StatusCode::INTERNAL_SERVER_ERROR);
410            }
411        };
412        resp.headers_mut().insert(CONTENT_ENCODING, encoding);
413
414        return Ok(StaticFileResponse {
415            resp,
416            file_path: resp_file_path,
417        });
418    }
419
420    #[cfg(feature = "experimental")]
421    let resp = file_reply(
422        headers_opt,
423        file_path,
424        &metadata,
425        None,
426        pre_opened,
427        opts.memory_cache,
428    )?;
429
430    #[cfg(not(feature = "experimental"))]
431    let resp = file_reply(headers_opt, file_path, &metadata, None, pre_opened)?;
432
433    Ok(StaticFileResponse {
434        resp,
435        file_path: resp_file_path,
436    })
437}
438
439/// Returns the final composed metadata containing
440/// the current `file_path` with its file metadata
441/// as well as its optional pre-compressed variant.
442fn get_composed_file_metadata<'a>(
443    mut file_path: &'a mut PathBuf,
444    headers: &'a HeaderMap<HeaderValue>,
445    compression_static: bool,
446    mut index_files: &'a [&'a str],
447) -> Result<FileMetadata<'a>, StatusCode> {
448    tracing::trace!("getting metadata for file {}", file_path.display());
449
450    // Try to find the file path on the file system
451    match try_metadata(file_path) {
452        Ok((mut metadata, is_dir)) => {
453            // The optional pre-opened file for `file_path`. When `Some`, the
454            // response pipeline reuses this handle instead of issuing an
455            // extra `open(2)` syscall. We only populate it when the index
456            // file is resolved via `try_file_open` below.
457            let mut opened_file = None;
458            // Whether the resolved `file_path` points to an existing file.
459            // For non-directory requests this is always true.
460            // For directory requests it becomes true only when an index file (or its
461            // `.html` suffix sibling) was successfully resolved. Used to
462            // gate the pre-compressed variant probe so we never issue
463            // `stat(2)` for `.br`/`.gz`/`.zst` siblings of a non-existent
464            // index (see issue #617).
465            let mut resolved_exists = !is_dir;
466            if is_dir {
467                // Try every index file variant in order
468                if index_files.is_empty() {
469                    index_files = DEFAULT_INDEX_FILES;
470                }
471                for index in index_files {
472                    // Append a HTML index page by default if it's a directory path (`autoindex`)
473                    tracing::debug!("dir: appending {} to the directory path", index);
474                    file_path.push(index);
475
476                    // Try to open the appended index file directly.
477                    // `try_file_open` performs a single `open(2)` + `fstat(2)`
478                    // instead of `stat(2)` followed by `open(2)` later in
479                    // `file_reply`, saving one path-resolving syscall on the
480                    // hot path.
481                    if let Ok((file, meta)) = try_file_open(file_path) {
482                        metadata = meta;
483                        opened_file = Some(file);
484                        resolved_exists = true;
485                        break;
486                    }
487
488                    // We remove only the appended index file
489                    file_path.pop();
490                    let new_meta: Option<Metadata>;
491                    (file_path, new_meta) = try_metadata_with_html_suffix(file_path);
492                    if let Some(new_meta) = new_meta {
493                        metadata = new_meta;
494                        resolved_exists = true;
495                        break;
496                    }
497                }
498
499                // In case no index was found then we append the last index
500                // of the list to preserve the previous behavior
501                if !resolved_exists && !index_files.is_empty() {
502                    file_path.push(index_files.last().unwrap());
503                }
504            }
505
506            // Only probe for pre-compressed siblings when the resolved file
507            // actually exists. Probing for `.br`/`.gz`/`.zst` of a path that
508            // was never confirmed on disk wastes one `stat(2)` per
509            // configured encoding on the request hot path
510            // (see issue #617).
511            let precompressed_variant = (compression_static && resolved_exists)
512                .then(|| compression_static::precompressed_variant(file_path, headers))
513                .flatten()
514                .map(|p| (p.file_path, p.encoding));
515
516            // If we are going to serve a precompressed variant, the
517            // pre-opened file points to the *original* file which won't be
518            // streamed; drop it so `file_reply` opens the precomp file.
519            if precompressed_variant.is_some() {
520                opened_file = None;
521            }
522
523            Ok(FileMetadata {
524                file_path,
525                metadata,
526                is_dir,
527                precompressed_variant,
528                file: opened_file,
529            })
530        }
531        Err(err) => {
532            // If the file path doesn't exist, then try the `.html`-suffixed path
533            // first. For example: `/posts/article` falls back to
534            // `/posts/article.html`.
535            //
536            // We intentionally do *not* probe for pre-compressed siblings
537            // of the original (non-existent) path. Doing so would waste
538            // one `stat(2)` per configured encoding for every truly
539            // missing path (see issue #617).
540            let new_meta: Option<Metadata>;
541            (file_path, new_meta) = try_metadata_with_html_suffix(file_path);
542
543            let Some(new_meta) = new_meta else {
544                // Neither the original path nor its `.html` sibling exists.
545                // Return the original error without probing for compressed
546                // variants of non-existent files.
547                return Err(err);
548            };
549
550            // The `.html` sibling exists. Only now is it worth probing for
551            // its pre-compressed sibling (`/article.html.br`, etc.).
552            let precompressed_variant = compression_static
553                .then(|| compression_static::precompressed_variant(file_path, headers))
554                .flatten()
555                .map(|p| (p.file_path, p.encoding));
556
557            Ok(FileMetadata {
558                file_path,
559                metadata: new_meta,
560                is_dir: false,
561                precompressed_variant,
562                file: None,
563            })
564        }
565    }
566}
567
568/// Reply with the corresponding file content taking into account
569/// its precompressed variant if any.
570/// The `path` param should contains always the original requested file path and
571/// the `meta` param value should corresponds to it.
572/// However, if `path_precompressed` contains some value then
573/// the `meta` param  value will belong to the `path_precompressed` (precompressed file variant).
574fn file_reply<'a>(
575    headers: &'a HeaderMap<HeaderValue>,
576    path: &'a PathBuf,
577    meta: &'a Metadata,
578    path_precompressed: Option<PathBuf>,
579    pre_opened: Option<File>,
580    #[cfg(feature = "experimental")] memory_cache: Option<&'a MemCacheOpts>,
581) -> Result<Response<Body>, StatusCode> {
582    let conditionals = ConditionalHeaders::new(headers);
583
584    // Reuse the pre-opened handle when serving the original file. For
585    // precompressed variants the open target differs, so we open the
586    // precomp file ourselves (and the caller dropped `pre_opened`).
587    let file_result = match (path_precompressed.as_deref(), pre_opened) {
588        (None, Some(file)) => Ok(file),
589        (Some(precomp_path), _) => File::open(precomp_path),
590        (None, None) => File::open(path),
591    };
592
593    match file_result {
594        Ok(file) => {
595            #[cfg(feature = "experimental")]
596            let resp = response_body(file, path, meta, conditionals, memory_cache);
597
598            #[cfg(not(feature = "experimental"))]
599            let resp = response_body(file, path, meta, conditionals);
600
601            resp
602        }
603        Err(err) => {
604            let status = match err.kind() {
605                io::ErrorKind::NotFound => {
606                    tracing::debug!("file can't be opened or not found: {:?}", path.display());
607                    StatusCode::NOT_FOUND
608                }
609                io::ErrorKind::PermissionDenied => {
610                    tracing::warn!("file permission denied: {:?}", path.display());
611                    StatusCode::FORBIDDEN
612                }
613                _ => {
614                    tracing::error!("file open error (path={:?}): {} ", path.display(), err);
615                    StatusCode::INTERNAL_SERVER_ERROR
616                }
617            };
618            Err(status)
619        }
620    }
621}