static-web-server 2.43.0

A cross-platform, high-performance and asynchronous web server for static files-serving.
Documentation
// SPDX-License-Identifier: MIT OR Apache-2.0
// This file is part of Static Web Server.
// See https://static-web-server.net/ for more information
// Copyright (C) 2019-present Jose Quintana <joseluisq.net>

//! It provides in-memory files cache functionality with expiration policy support
//! such as Time to live (TTL) and Time to idle (TTI).
//!
//! Admission to a cache is controlled by the Least Frequently Used (LFU) policy
//! and the eviction from a cache is controlled by the Least Recently Used (LRU) policy.
//!

use bytes::Bytes;
use compact_str::CompactString;
use headers::{AcceptRanges, ContentLength, ContentRange, HeaderMap, HeaderMapExt, LastModified};
use hyper::header::{CONTENT_TYPE, HeaderValue};
use hyper::{Body, Response, StatusCode};
use mini_moka::sync::Cache;
use std::path::Path;
use std::sync::{Arc, OnceLock};
use std::time::Duration;
use tokio::sync::{Semaphore, SemaphorePermit};

use crate::Result;
use crate::conditional_headers::{ConditionalBody, ConditionalHeaders};
use crate::handler::RequestHandlerOpts;
use crate::response::{BadRangeError, bytes_range};

/// Global cache that stores all files in memory.
/// It provides expiration policies like Time to live (TTL) and Time to idle (TTI) support.
pub(crate) static CACHE_STORE: OnceLock<Cache<CompactString, Arc<MemFile>>> = OnceLock::new();

/// A single cache permit to allow reading a file once.
static CACHE_PERMIT: Semaphore = Semaphore::const_new(1);

/// It defines the in-memory files cache options.
pub struct MemCacheOpts {
    /// The maximum size per file in bytes.
    pub max_file_size: u64,
}

impl MemCacheOpts {
    /// Creates a new instance of `MemCacheOpts`.
    #[inline]
    pub fn new(max_file_size: u64) -> Self {
        Self {
            max_file_size: 1024 * 1024 * max_file_size,
        }
    }
}

/// Make sure to initialize the in-memory cache store.
pub(crate) fn init(handler_opts: &mut RequestHandlerOpts) -> Result {
    if let Some(advanced_opts) = handler_opts.advanced_opts.as_ref()
        && let Some(opts) = advanced_opts.memory_cache.as_ref()
    {
        // TODO: define maximum values

        // Default 256 entries
        let capacity = opts.capacity.unwrap_or(256);
        // Default 1h
        let ttl = opts.ttl.unwrap_or(3600);
        // Default 5min
        let tti = opts.tti.unwrap_or(300);
        // Default 8mb
        let max_file_size = opts.max_file_size.unwrap_or(8192);

        tracing::info!(
            "in-memory cache (experimental): enabled=true, capacity={capacity}, ttl={ttl}, tti={tti}, max_file_size={max_file_size}"
        );

        let mem_opts = MemCacheOpts::new(max_file_size);

        let cache = Cache::builder()
            .max_capacity(capacity)
            // Time to live (TTL): 30 minutes
            .time_to_live(Duration::from_secs(ttl))
            // Time to idle (TTI):  5 minutes
            .time_to_idle(Duration::from_secs(tti))
            .build();

        if CACHE_STORE.set(cache).is_err() {
            bail!("unable to initialize the in-memory cache store")
        }

        handler_opts.memory_cache = Some(mem_opts);

        return Ok(());
    }

    tracing::info!("in-memory cache (experimental): enabled=false");

    Ok(())
}

/// Result of a cache lookup via `get_or_acquire`.
pub(crate) enum CacheResult<'a> {
    /// Cache hit — return the response directly.
    Hit(Result<Response<Body>, StatusCode>),
    /// Cache miss — caller should read the file. The semaphore permit is held
    /// so that concurrent misses are serialized (single-flight). Drop the permit
    /// after inserting into the cache store.
    Miss(SemaphorePermit<'a>),
    /// An error occurred acquiring the semaphore.
    Error(StatusCode),
}

/// Try to get the file in a form of a response from the cache store by a path or
/// acquires a permit to ensure to hold until the file is read first (once).
///
/// If the file is not found in the cache store then
/// a cache permit is acquired internally (one at a time)
/// to allow the caller to read the file first.
/// Once the file is read on caller's side then the permit is dropped.
pub(crate) async fn get_or_acquire(
    file_path: &Path,
    headers_opt: &HeaderMap,
) -> Option<CacheResult<'static>> {
    let file_path_str = file_path.to_str().or(None)?;

    let store = CACHE_STORE.get().unwrap();
    match store.get::<CompactString>(&file_path_str.into()) {
        Some(mem_file) => {
            tracing::debug!(
                "file `{}` found in the in-memory cache store, returning it directly",
                file_path_str
            );
            Some(CacheResult::Hit(mem_file.response_body(headers_opt)))
        }
        _ => {
            tracing::debug!(
                "file `{}` was not found in the in-memory cache store, continuing",
                file_path_str
            );
            // If a file is not found in the store then continue
            // with the normal flow and wait on first file read.
            // Hold the permit so concurrent misses are serialized.
            match CACHE_PERMIT.acquire().await {
                Ok(permit) => {
                    // Re-check after acquiring — another request may have populated
                    // the cache while we were waiting for the permit.
                    if let Some(mem_file) = store.get::<CompactString>(&file_path_str.into()) {
                        return Some(CacheResult::Hit(mem_file.response_body(headers_opt)));
                    }
                    Some(CacheResult::Miss(permit))
                }
                Err(err) => {
                    tracing::error!("error trying to acquire permit on first read: {:?}", err);
                    Some(CacheResult::Error(StatusCode::INTERNAL_SERVER_ERROR))
                }
            }
        }
    }
}

#[derive(Debug, Clone)]
pub(crate) struct MemFileTempOpts {
    pub(crate) file_path: String,
    /// Pre-built `Content-Type` `HeaderValue`. Reusing a `HeaderValue`
    /// (instead of `ContentType`) avoids re-encoding the mime string
    /// when the entry is eventually inserted into the cache.
    pub(crate) content_type: HeaderValue,
    pub(crate) last_modified: Option<LastModified>,
}

impl MemFileTempOpts {
    pub(crate) fn new(
        file_path: String,
        content_type: HeaderValue,
        last_modified: Option<LastModified>,
    ) -> Self {
        Self {
            file_path,
            content_type,
            last_modified,
        }
    }
}

/// In-memory file representation to be stored in the cache.
///
/// Holds the full file body as a [`Bytes`] (shared, reference-counted, zero-copy
/// cloneable) and a pre-built `Content-Type` [`HeaderValue`] so that serving a
/// cached response avoids any per-request allocation or string conversion.
#[derive(Debug)]
pub(crate) struct MemFile {
    /// Bytes of the current file.
    data: Bytes,
    /// Pre-built `Content-Type` header value. Stored as a [`HeaderValue`]
    /// (rather than `ContentType`) so that emitting it on a cache hit is a
    /// cheap reference-counted clone.
    content_type: HeaderValue,
    /// `Last-Modified` header for the current file.
    last_modified: Option<LastModified>,
}

impl MemFile {
    #[inline]
    pub(crate) fn new(
        data: Bytes,
        content_type: HeaderValue,
        last_modified: Option<LastModified>,
    ) -> Self {
        Self {
            data,
            content_type,
            last_modified,
        }
    }

    /// Build a response for a cache hit.
    ///
    /// The body is constructed directly from the in-memory [`Bytes`] (a single
    /// reference-counted clone for full responses, an O(1) `Bytes::slice` for
    /// range responses). No allocation or copying of the file contents occurs
    /// on the hot path; the response body is a single data frame, not a
    /// chunked stream.
    pub(crate) fn response_body(&self, headers: &HeaderMap) -> Result<Response<Body>, StatusCode> {
        let conditionals = ConditionalHeaders::new(headers);
        let modified = self.last_modified;

        match conditionals.check(modified) {
            ConditionalBody::NoBody(resp) => Ok(resp),
            ConditionalBody::WithBody(range) => {
                let total_len = self.data.len() as u64;

                bytes_range(range, total_len)
                    .map(|(start, end)| {
                        let sub_len = end - start;
                        let is_partial = sub_len != total_len;

                        // Zero-copy body: for a full response we clone the
                        // `Bytes` (refcount bump); for a range we use
                        // `Bytes::slice` (O(1), shared buffer).
                        let body_bytes = if is_partial {
                            self.data.slice(start as usize..end as usize)
                        } else {
                            self.data.clone()
                        };
                        let mut resp = Response::new(Body::from(body_bytes));

                        if is_partial {
                            *resp.status_mut() = StatusCode::PARTIAL_CONTENT;
                            match ContentRange::bytes(start..end, total_len) {
                                Ok(range) => {
                                    resp.headers_mut().typed_insert(range);
                                }
                                Err(err) => {
                                    tracing::error!("invalid content range error: {:?}", err);
                                    let mut resp = Response::new(Body::empty());
                                    *resp.status_mut() = StatusCode::RANGE_NOT_SATISFIABLE;
                                    resp.headers_mut()
                                        .typed_insert(ContentRange::unsatisfied_bytes(total_len));
                                    return Ok(resp);
                                }
                            }
                        }

                        let h = resp.headers_mut();
                        h.typed_insert(ContentLength(sub_len));
                        // Cheap refcount clone of the pre-built header value
                        // (avoids re-stringifying the mime type per request).
                        h.insert(CONTENT_TYPE, self.content_type.clone());
                        h.typed_insert(AcceptRanges::bytes());

                        if let Some(last_modified) = modified {
                            h.typed_insert(last_modified);
                        }

                        Ok(resp)
                    })
                    .unwrap_or_else(|BadRangeError| {
                        let mut resp = Response::new(Body::empty());
                        *resp.status_mut() = StatusCode::RANGE_NOT_SATISFIABLE;
                        resp.headers_mut()
                            .typed_insert(ContentRange::unsatisfied_bytes(total_len));
                        Ok(resp)
                    })
            }
        }
    }
}