warpdrive_proxy/middleware/
static_files.rs

1//! Static file serving middleware
2//!
3//! Serves static files directly from disk without proxying to the upstream application.
4//! This provides massive performance improvements for serving assets like JavaScript, CSS,
5//! images, and other static content.
6//!
7//! # How it works
8//!
9//! 1. Request arrives (e.g., `/assets/application-abc123.js`)
10//! 2. Check if path matches configured static paths (`/assets`, `/packs`, etc.)
11//! 3. If match: resolve to file on disk (`./public/assets/application-abc123.js`)
12//! 4. Validate path security (prevent directory traversal)
13//! 5. (Future) Optional cache lookup (L1 memory, L2 Redis)
14//! 6. Read file from disk (stream large files)
15//! 7. If file not found and fallthrough=true: continue to upstream
16//! 8. If file not found and fallthrough=false: return 404
17//!
18//! # Performance
19//!
20//! - Cache hit: <1ms (sub-millisecond from memory)
21//! - Cache miss: 1-5ms (read from disk + cache)
22//! - vs Rails: 100x faster (no Rails stack, no Ruby)
23//!
24//! # Security
25//!
26//! - Directory traversal prevention
27//! - Hidden file blocking
28//! - Symlink validation
29//! - File size limits
30
31use async_trait::async_trait;
32use bytes::Bytes;
33use pingora::http::ResponseHeader;
34use pingora::prelude::*;
35use std::path::{Path, PathBuf};
36use tokio::fs;
37use tracing::{debug, warn};
38
39use super::{Middleware, MiddlewareContext, StaticResponse, StaticResponseBody};
40use crate::config::Config;
41#[cfg(test)]
42use crate::config::DEFAULT_STATIC_INLINE_SIZE_LIMIT;
43
44/// MIME type mapping for common file extensions
45const MIME_TYPES: &[(&str, &str)] = &[
46    // JavaScript
47    ("js", "application/javascript"),
48    ("mjs", "application/javascript"),
49    // CSS
50    ("css", "text/css"),
51    // HTML
52    ("html", "text/html"),
53    ("htm", "text/html"),
54    // Images
55    ("jpg", "image/jpeg"),
56    ("jpeg", "image/jpeg"),
57    ("png", "image/png"),
58    ("gif", "image/gif"),
59    ("svg", "image/svg+xml"),
60    ("webp", "image/webp"),
61    ("ico", "image/x-icon"),
62    // Fonts
63    ("woff", "font/woff"),
64    ("woff2", "font/woff2"),
65    ("ttf", "font/ttf"),
66    ("otf", "font/otf"),
67    ("eot", "application/vnd.ms-fontobject"),
68    // Other
69    ("json", "application/json"),
70    ("xml", "application/xml"),
71    ("pdf", "application/pdf"),
72    ("txt", "text/plain"),
73    ("map", "application/json"), // Source maps
74];
75
76/// Static file serving middleware
77pub struct StaticFilesMiddleware {
78    /// Whether static file serving is enabled
79    enabled: bool,
80
81    /// Root directory for static files
82    root: PathBuf,
83
84    /// URL path prefixes that should be served as static files
85    paths: Vec<String>,
86
87    /// Cache-Control header value
88    cache_control: String,
89
90    /// Whether gzip serving is enabled
91    gzip_enabled: bool,
92
93    /// Index files to try for directory requests
94    index_files: Vec<String>,
95
96    /// If true, continue to backend when file not found; if false, return 404
97    fallthrough: bool,
98
99    /// Maximum file size to keep in memory (larger files are streamed)
100    inline_file_size_limit: u64,
101}
102
103impl StaticFilesMiddleware {
104    /// Create new static files middleware from configuration
105    pub fn from_config(config: &Config) -> Self {
106        Self {
107            enabled: config.static_enabled,
108            root: config.static_root.clone(),
109            paths: config.static_paths.clone(),
110            cache_control: config.static_cache_control.clone(),
111            gzip_enabled: config.static_gzip_enabled,
112            index_files: config.static_index_files.clone(),
113            fallthrough: config.static_fallthrough,
114            inline_file_size_limit: config.static_inline_size_limit,
115        }
116    }
117
118    /// Check if the request path matches configured static paths
119    fn matches_static_path(&self, path: &str) -> bool {
120        self.paths.iter().any(|prefix| path.starts_with(prefix))
121    }
122
123    /// Resolve URL path to filesystem path
124    ///
125    /// Example: `/assets/app-123.js` + `./public` → `./public/assets/app-123.js`
126    fn resolve_path(&self, url_path: &str) -> PathBuf {
127        // Remove leading slash for joining
128        let relative_path = url_path.trim_start_matches('/');
129        self.root.join(relative_path)
130    }
131
132    /// Validate file path for security
133    ///
134    /// Prevents:
135    /// - Directory traversal (`../`)
136    /// - Symlinks outside root (optional)
137    /// - Hidden files (`.` prefix)
138    fn validate_path(&self, file_path: &Path) -> bool {
139        // Reject symlinks to avoid exposing files outside the static root
140        if let Ok(metadata) = std::fs::symlink_metadata(file_path) {
141            if metadata.file_type().is_symlink() {
142                warn!("Symlink access blocked: {:?}", file_path);
143                return false;
144            }
145        }
146
147        // Convert to absolute path for validation
148        let file_path = match file_path.canonicalize() {
149            Ok(p) => p,
150            Err(_) => return false, // File doesn't exist or can't be accessed
151        };
152
153        let root = match self.root.canonicalize() {
154            Ok(p) => p,
155            Err(_) => {
156                warn!("Static root directory doesn't exist: {:?}", self.root);
157                return false;
158            }
159        };
160
161        // Ensure file is within root directory
162        if !file_path.starts_with(&root) {
163            warn!("Path traversal attempt blocked: {:?}", file_path);
164            return false;
165        }
166
167        // Check if it's a file (not a directory)
168        if !file_path.is_file() {
169            return false;
170        }
171
172        // Block hidden files (.htaccess, .env, etc.)
173        if let Some(filename) = file_path.file_name() {
174            if filename.to_string_lossy().starts_with('.') {
175                warn!("Hidden file access blocked: {:?}", file_path);
176                return false;
177            }
178        }
179
180        true
181    }
182
183    /// Detect MIME type from file extension
184    fn detect_content_type(path: &Path) -> &'static str {
185        if let Some(ext) = path.extension() {
186            let ext_str = ext.to_string_lossy().to_lowercase();
187            for (extension, mime_type) in MIME_TYPES {
188                if ext_str == *extension {
189                    return mime_type;
190                }
191            }
192        }
193
194        // Default to octet-stream for unknown types
195        "application/octet-stream"
196    }
197
198    /// Generate ETag from file metadata
199    ///
200    /// Format: "{size}-{mtime_nanos}"
201    /// Example: "12345-1609459200000000000"
202    fn generate_etag(metadata: &std::fs::Metadata) -> Option<String> {
203        let size = metadata.len();
204        let mtime = metadata.modified().ok()?;
205        let mtime_nanos = mtime.duration_since(std::time::UNIX_EPOCH).ok()?.as_nanos();
206
207        Some(format!("\"{}-{}\"", size, mtime_nanos))
208    }
209
210    /// Try to serve gzipped version of file if available
211    ///
212    /// Checks for `{file}.gz` and serves if:
213    /// 1. Gzip enabled
214    /// 2. Client accepts gzip (Accept-Encoding: gzip)
215    /// 3. `.gz` file exists
216    fn try_gzip_path(&self, path: &Path) -> Option<PathBuf> {
217        if !self.gzip_enabled {
218            return None;
219        }
220
221        let mut gz_path = path.to_path_buf();
222        let mut filename = gz_path.file_name()?.to_os_string();
223        filename.push(".gz");
224        gz_path.set_file_name(filename);
225
226        if gz_path.exists() && gz_path.is_file() {
227            Some(gz_path)
228        } else {
229            None
230        }
231    }
232
233    /// Serve index file for directory requests
234    ///
235    /// If request is for `/assets/` try `/assets/index.html`, `/assets/index.htm`, etc.
236    async fn try_index_file(&self, dir_path: &Path) -> Option<PathBuf> {
237        for index_file in &self.index_files {
238            let index_path = dir_path.join(index_file);
239            if index_path.exists() && index_path.is_file() {
240                return Some(index_path);
241            }
242        }
243        None
244    }
245}
246
247#[async_trait]
248impl Middleware for StaticFilesMiddleware {
249    /// Check if request matches static path and serve file
250    ///
251    /// This runs BEFORE proxying to upstream, allowing us to intercept
252    /// static file requests and serve them directly.
253    async fn request_filter(
254        &self,
255        session: &mut Session,
256        ctx: &mut MiddlewareContext,
257    ) -> Result<()> {
258        if !self.enabled {
259            return Ok(());
260        }
261
262        let req_path = session.req_header().uri.path();
263
264        // Check if path matches configured static paths
265        if !self.matches_static_path(req_path) {
266            return Ok(());
267        }
268
269        debug!("Static file request: {}", req_path);
270
271        // Resolve to filesystem path
272        let mut file_path = self.resolve_path(req_path);
273
274        // If path ends with `/`, try index files
275        if req_path.ends_with('/') {
276            if let Some(index_path) = self.try_index_file(&file_path).await {
277                file_path = index_path;
278            }
279        }
280
281        // Validate path security
282        if !self.validate_path(&file_path) {
283            if self.fallthrough {
284                // File doesn't exist or invalid - continue to upstream
285                debug!("Static file not found, falling through: {:?}", file_path);
286                return Ok(());
287            } else {
288                // Return 404
289                debug!("Static file not found, returning 404: {:?}", file_path);
290                return Err(Error::explain(ErrorType::HTTPStatus(404), "File not found"));
291            }
292        }
293
294        // Check for gzipped version if client accepts gzip
295        let accept_encoding = session
296            .req_header()
297            .headers
298            .get("accept-encoding")
299            .and_then(|v| v.to_str().ok())
300            .unwrap_or("");
301
302        let (serve_path, is_gzipped) = if accept_encoding.contains("gzip") {
303            if let Some(gz_path) = self.try_gzip_path(&file_path) {
304                if self.validate_path(&gz_path) {
305                    debug!("Serving gzipped version: {:?}", gz_path);
306                    (gz_path, true)
307                } else {
308                    (file_path, false)
309                }
310            } else {
311                (file_path, false)
312            }
313        } else {
314            (file_path, false)
315        };
316
317        // Retrieve metadata once (size, mtime, etc.)
318        let metadata = match fs::metadata(&serve_path).await {
319            Ok(meta) => meta,
320            Err(err) => {
321                warn!("Failed to stat static file {:?}: {}", serve_path, err);
322                if self.fallthrough {
323                    return Ok(());
324                } else {
325                    return Err(Error::explain(ErrorType::HTTPStatus(404), "File not found"));
326                }
327            }
328        };
329
330        let file_len = metadata.len();
331
332        // Generate ETag from metadata
333        let etag = Self::generate_etag(&metadata);
334
335        // Check If-None-Match for 304 Not Modified
336        if let Some(ref etag_value) = etag {
337            if let Some(if_none_match) = session
338                .req_header()
339                .headers
340                .get("if-none-match")
341                .and_then(|v| v.to_str().ok())
342            {
343                if if_none_match == etag_value {
344                    debug!("ETag match, returning 304 Not Modified");
345
346                    // Create 304 response
347                    let mut resp = ResponseHeader::build(304, None)?;
348                    resp.insert_header("ETag", etag_value)?;
349                    resp.insert_header("Cache-Control", &self.cache_control)?;
350
351                    ctx.static_response = Some(StaticResponse {
352                        header: resp,
353                        body: StaticResponseBody::InMemory(Bytes::new()),
354                    });
355                    return Ok(());
356                }
357            }
358        }
359
360        // Build response headers
361        let mut resp = ResponseHeader::build(200, None)?;
362
363        let content_type = Self::detect_content_type(&serve_path);
364        resp.insert_header("Content-Type", content_type)?;
365        resp.insert_header("Content-Length", file_len.to_string())?;
366        resp.insert_header("Cache-Control", &self.cache_control)?;
367
368        if let Some(etag_value) = etag {
369            resp.insert_header("ETag", &etag_value)?;
370        }
371
372        if is_gzipped {
373            resp.insert_header("Content-Encoding", "gzip")?;
374            // Remove .gz from Content-Type determination
375            let original_path = serve_path.with_extension("");
376            let original_content_type = Self::detect_content_type(&original_path);
377            resp.insert_header("Content-Type", original_content_type)?;
378        }
379
380        let body = if file_len <= self.inline_file_size_limit {
381            match fs::read(&serve_path).await {
382                Ok(data) => StaticResponseBody::InMemory(Bytes::from(data)),
383                Err(err) => {
384                    warn!("Failed to read static file {:?}: {}", serve_path, err);
385                    if self.fallthrough {
386                        return Ok(());
387                    } else {
388                        return Err(Error::explain(
389                            ErrorType::HTTPStatus(500),
390                            "Failed to read file",
391                        ));
392                    }
393                }
394            }
395        } else {
396            StaticResponseBody::Stream(serve_path.clone())
397        };
398
399        debug!(
400            "Serving static file: {:?} ({} bytes, {})",
401            serve_path, file_len, content_type
402        );
403
404        // Store response in context to short-circuit proxy
405        ctx.static_response = Some(StaticResponse { header: resp, body });
406
407        Ok(())
408    }
409}
410
411#[cfg(test)]
412mod tests {
413    use super::*;
414
415    #[test]
416    fn test_mime_type_detection() {
417        assert_eq!(
418            StaticFilesMiddleware::detect_content_type(Path::new("app.js")),
419            "application/javascript"
420        );
421        assert_eq!(
422            StaticFilesMiddleware::detect_content_type(Path::new("style.css")),
423            "text/css"
424        );
425        assert_eq!(
426            StaticFilesMiddleware::detect_content_type(Path::new("image.png")),
427            "image/png"
428        );
429        assert_eq!(
430            StaticFilesMiddleware::detect_content_type(Path::new("unknown.xyz")),
431            "application/octet-stream"
432        );
433    }
434
435    #[test]
436    fn test_path_matching() {
437        let middleware = StaticFilesMiddleware {
438            enabled: true,
439            root: PathBuf::from("./public"),
440            paths: vec!["/assets".to_string(), "/packs".to_string()],
441            cache_control: "public, max-age=31536000".to_string(),
442            gzip_enabled: true,
443            index_files: vec!["index.html".to_string()],
444            fallthrough: true,
445            inline_file_size_limit: DEFAULT_STATIC_INLINE_SIZE_LIMIT,
446        };
447
448        assert!(middleware.matches_static_path("/assets/app.js"));
449        assert!(middleware.matches_static_path("/packs/application.css"));
450        assert!(!middleware.matches_static_path("/api/users"));
451    }
452
453    #[test]
454    fn test_path_resolution() {
455        let middleware = StaticFilesMiddleware {
456            enabled: true,
457            root: PathBuf::from("./public"),
458            paths: vec!["/assets".to_string()],
459            cache_control: String::new(),
460            gzip_enabled: false,
461            index_files: vec![],
462            fallthrough: true,
463            inline_file_size_limit: DEFAULT_STATIC_INLINE_SIZE_LIMIT,
464        };
465
466        let resolved = middleware.resolve_path("/assets/app.js");
467        assert_eq!(resolved, PathBuf::from("./public/assets/app.js"));
468    }
469}