sentinel_proxy/static_files/
mod.rs

1//! Static file serving module for Sentinel proxy
2//!
3//! This module provides high-performance static file serving with:
4//! - Range requests (206 Partial Content) for resumable downloads and video seeking
5//! - Zero-copy file serving using memory-mapped files for large files
6//! - On-the-fly gzip/brotli compression
7//! - In-memory caching for small files
8//! - Directory listing and SPA routing
9//!
10//! # Module Structure
11//!
12//! - [`cache`]: File caching with pre-computed compression
13//! - [`compression`]: Content encoding and compression utilities
14//! - [`range`]: HTTP Range request handling
15
16mod cache;
17mod compression;
18mod range;
19
20pub use cache::{CacheStats, CachedFile, FileCache};
21pub use compression::ContentEncoding;
22
23use anyhow::Result;
24use bytes::Bytes;
25use http::{header, Method, Request, Response, StatusCode};
26use http_body_util::Full;
27use mime_guess::from_path;
28use std::path::{Component, Path, PathBuf};
29use std::sync::Arc;
30use std::time::Instant;
31use tokio::fs;
32use tracing::{debug, trace, warn};
33
34use sentinel_config::StaticFileConfig;
35
36use compression::{compress_content, negotiate_encoding, should_compress};
37use range::serve_range_request;
38
39// ============================================================================
40// Constants
41// ============================================================================
42
43/// Minimum file size for compression (1KB) - smaller files have overhead
44const MIN_COMPRESS_SIZE: u64 = 1024;
45
46/// Maximum file size to cache in memory (1MB)
47const MAX_CACHE_FILE_SIZE: u64 = 1024 * 1024;
48
49/// File size threshold for memory-mapped serving (10MB)
50const MMAP_THRESHOLD: u64 = 10 * 1024 * 1024;
51
52// ============================================================================
53// Static File Server
54// ============================================================================
55
56/// Static file server
57pub struct StaticFileServer {
58    /// Configuration for static file serving
59    config: Arc<StaticFileConfig>,
60    /// Cached file metadata
61    cache: Arc<FileCache>,
62}
63
64impl StaticFileServer {
65    /// Create a new static file server
66    pub fn new(config: StaticFileConfig) -> Self {
67        trace!(
68            root = %config.root.display(),
69            index = %config.index,
70            directory_listing = config.directory_listing,
71            compress = config.compress,
72            has_fallback = config.fallback.is_some(),
73            "Creating static file server"
74        );
75
76        let cache = Arc::new(FileCache::with_defaults());
77
78        debug!(
79            root = %config.root.display(),
80            "Static file server initialized"
81        );
82
83        Self {
84            config: Arc::new(config),
85            cache,
86        }
87    }
88
89    /// Get cache statistics
90    pub fn cache_stats(&self) -> CacheStats {
91        let stats = self.cache.stats();
92        trace!(
93            entries = stats.entry_count,
94            total_size = stats.total_size,
95            "Retrieved cache stats"
96        );
97        stats
98    }
99
100    /// Clear the file cache
101    pub fn clear_cache(&self) {
102        let before = self.cache.stats().entry_count;
103        self.cache.clear();
104        debug!(cleared_entries = before, "File cache cleared");
105    }
106
107    /// Serve a static file request
108    pub async fn serve<B>(&self, req: &Request<B>, path: &str) -> Result<Response<Full<Bytes>>> {
109        trace!(
110            method = %req.method(),
111            path = %path,
112            "Serving static file request"
113        );
114
115        // Validate request method
116        match req.method() {
117            &Method::GET | &Method::HEAD => {}
118            _ => {
119                debug!(
120                    method = %req.method(),
121                    path = %path,
122                    "Method not allowed for static file"
123                );
124                return Ok(Response::builder()
125                    .status(StatusCode::METHOD_NOT_ALLOWED)
126                    .header(header::ALLOW, "GET, HEAD")
127                    .body(Full::new(Bytes::new()))?);
128            }
129        }
130
131        // Resolve path securely
132        let file_path = match self.resolve_path(path) {
133            Some(p) => p,
134            None => {
135                debug!(
136                    path = %path,
137                    "Path resolution failed or blocked"
138                );
139                return self.not_found_response();
140            }
141        };
142
143        trace!(
144            request_path = %path,
145            resolved_path = %file_path.display(),
146            "Path resolved"
147        );
148
149        // Check if path is a directory
150        let metadata = match fs::metadata(&file_path).await {
151            Ok(m) => m,
152            Err(e) => {
153                trace!(
154                    path = %file_path.display(),
155                    error = %e,
156                    "File metadata lookup failed"
157                );
158                // File not found - check for SPA fallback
159                if self.config.fallback.is_some() {
160                    if let Some(index_path) = self.find_spa_fallback() {
161                        trace!(
162                            original_path = %path,
163                            fallback_path = %index_path.display(),
164                            "Using SPA fallback"
165                        );
166                        let meta = fs::metadata(&index_path).await?;
167                        return self.serve_file(req, &index_path, meta).await;
168                    }
169                }
170                return self.not_found_response();
171            }
172        };
173
174        if metadata.is_dir() {
175            trace!(
176                path = %file_path.display(),
177                "Path is directory, looking for index"
178            );
179            // Try to serve index file
180            for index_file in &["index.html", "index.htm"] {
181                let index_path = file_path.join(index_file);
182                if let Ok(index_meta) = fs::metadata(&index_path).await {
183                    if index_meta.is_file() {
184                        trace!(
185                            index_file = %index_file,
186                            "Found index file"
187                        );
188                        return self.serve_file(req, &index_path, index_meta).await;
189                    }
190                }
191            }
192
193            // Directory listing if enabled
194            if self.config.directory_listing {
195                trace!(
196                    path = %file_path.display(),
197                    "Generating directory listing"
198                );
199                return self.generate_directory_listing(&file_path).await;
200            }
201
202            debug!(
203                path = %file_path.display(),
204                "Directory listing not allowed"
205            );
206            return Ok(Response::builder()
207                .status(StatusCode::FORBIDDEN)
208                .body(Full::new(Bytes::from_static(
209                    b"Directory listing not allowed",
210                )))?);
211        }
212
213        // Serve the file
214        self.serve_file(req, &file_path, metadata).await
215    }
216
217    /// Resolve path securely
218    fn resolve_path(&self, path: &str) -> Option<PathBuf> {
219        // Remove leading slash and decode URL encoding
220        let path = path.trim_start_matches('/');
221        let decoded = urlencoding::decode(path).ok()?;
222
223        // Build path and validate it doesn't escape root
224        let mut resolved = self.config.root.clone();
225        for component in Path::new(decoded.as_ref()).components() {
226            match component {
227                Component::Normal(c) => resolved.push(c),
228                Component::ParentDir => {
229                    // Reject any path traversal attempts
230                    warn!("Path traversal attempt detected: {}", path);
231                    return None;
232                }
233                Component::CurDir => {}
234                _ => return None,
235            }
236        }
237
238        // Verify path is within root
239        if !resolved.starts_with(&self.config.root) {
240            warn!(
241                "Path escapes root directory: {:?} (root: {:?})",
242                resolved, self.config.root
243            );
244            return None;
245        }
246
247        Some(resolved)
248    }
249
250    /// Find SPA fallback index file
251    fn find_spa_fallback(&self) -> Option<PathBuf> {
252        if let Some(ref fallback) = self.config.fallback {
253            let index_path = self.config.root.join(fallback);
254            if index_path.exists() {
255                return Some(index_path);
256            }
257        }
258        None
259    }
260
261    /// Serve a file
262    async fn serve_file<B>(
263        &self,
264        req: &Request<B>,
265        file_path: &Path,
266        metadata: std::fs::Metadata,
267    ) -> Result<Response<Full<Bytes>>> {
268        let modified = metadata.modified()?;
269        let file_size = metadata.len();
270
271        trace!(
272            path = %file_path.display(),
273            size = file_size,
274            "Serving file"
275        );
276
277        // Generate ETag based on size and modification time
278        let etag = self.generate_etag_from_metadata(file_size, modified);
279
280        // Check conditional headers (If-None-Match, If-Modified-Since)
281        if let Some(response) = self.check_conditional_headers(req, &etag, modified)? {
282            trace!(
283                path = %file_path.display(),
284                status = 304,
285                "Returning 304 Not Modified"
286            );
287            return Ok(response);
288        }
289
290        // Determine content type
291        let content_type = self.get_content_type(file_path);
292
293        // Negotiate content encoding
294        let encoding = if self.config.compress
295            && should_compress(&content_type)
296            && file_size >= MIN_COMPRESS_SIZE
297        {
298            negotiate_encoding(req)
299        } else {
300            ContentEncoding::Identity
301        };
302
303        trace!(
304            path = %file_path.display(),
305            content_type = %content_type,
306            encoding = ?encoding,
307            "Content negotiation complete"
308        );
309
310        // Check for Range header
311        if let Some(range_header) = req.headers().get(header::RANGE) {
312            trace!(
313                path = %file_path.display(),
314                "Processing range request"
315            );
316            return serve_range_request(
317                req,
318                file_path,
319                file_size,
320                &content_type,
321                &etag,
322                modified,
323                range_header,
324                &self.config.cache_control,
325            )
326            .await;
327        }
328
329        // Check cache for small files
330        if file_size < MAX_CACHE_FILE_SIZE {
331            if let Some(cached) = self.cache.get(file_path) {
332                if cached.is_fresh() && cached.size == file_size {
333                    trace!(
334                        path = %file_path.display(),
335                        "Serving from cache"
336                    );
337                    return self.serve_cached(req, cached, encoding);
338                }
339            }
340        }
341
342        // For HEAD requests, return headers only
343        if req.method() == Method::HEAD {
344            trace!(
345                path = %file_path.display(),
346                "Serving HEAD request"
347            );
348            return self.build_head_response(&content_type, file_size, &etag, modified);
349        }
350
351        // Serve the file based on size
352        if file_size >= MMAP_THRESHOLD {
353            // Large file: stream it
354            debug!(
355                path = %file_path.display(),
356                size = file_size,
357                "Serving large file"
358            );
359            self.serve_large_file(
360                file_path,
361                &content_type,
362                file_size,
363                &etag,
364                modified,
365                encoding,
366            )
367            .await
368        } else {
369            // Small/medium file: read into memory
370            trace!(
371                path = %file_path.display(),
372                size = file_size,
373                "Serving small/medium file"
374            );
375            self.serve_small_file(
376                req,
377                file_path,
378                &content_type,
379                file_size,
380                &etag,
381                modified,
382                encoding,
383            )
384            .await
385        }
386    }
387
388    /// Generate ETag from file metadata
389    fn generate_etag_from_metadata(&self, size: u64, modified: std::time::SystemTime) -> String {
390        let modified_ts = modified
391            .duration_since(std::time::UNIX_EPOCH)
392            .unwrap_or_default()
393            .as_secs();
394        format!("\"{:x}-{:x}\"", size, modified_ts)
395    }
396
397    /// Check conditional headers and return 304 if appropriate
398    fn check_conditional_headers<B>(
399        &self,
400        req: &Request<B>,
401        etag: &str,
402        modified: std::time::SystemTime,
403    ) -> Result<Option<Response<Full<Bytes>>>> {
404        // Check If-None-Match (ETag)
405        if let Some(if_none_match) = req.headers().get(header::IF_NONE_MATCH) {
406            if let Ok(if_none_match_str) = if_none_match.to_str() {
407                // Handle multiple ETags separated by commas
408                let matches = if_none_match_str == "*"
409                    || if_none_match_str
410                        .split(',')
411                        .any(|tag| tag.trim().trim_matches('"') == etag.trim_matches('"'));
412
413                if matches {
414                    return Ok(Some(
415                        Response::builder()
416                            .status(StatusCode::NOT_MODIFIED)
417                            .header(header::ETAG, etag)
418                            .body(Full::new(Bytes::new()))?,
419                    ));
420                }
421            }
422        }
423
424        // Check If-Modified-Since
425        if let Some(if_modified) = req.headers().get(header::IF_MODIFIED_SINCE) {
426            if let Ok(if_modified_str) = if_modified.to_str() {
427                if let Ok(if_modified_time) = httpdate::parse_http_date(if_modified_str) {
428                    // Only compare seconds (HTTP dates have second precision)
429                    let modified_secs = modified
430                        .duration_since(std::time::UNIX_EPOCH)
431                        .unwrap_or_default()
432                        .as_secs();
433                    let if_modified_secs = if_modified_time
434                        .duration_since(std::time::UNIX_EPOCH)
435                        .unwrap_or_default()
436                        .as_secs();
437
438                    if modified_secs <= if_modified_secs {
439                        return Ok(Some(
440                            Response::builder()
441                                .status(StatusCode::NOT_MODIFIED)
442                                .header(header::ETAG, etag)
443                                .body(Full::new(Bytes::new()))?,
444                        ));
445                    }
446                }
447            }
448        }
449
450        Ok(None)
451    }
452
453    /// Get content type for a file
454    fn get_content_type(&self, path: &Path) -> String {
455        from_path(path).first_or_octet_stream().as_ref().to_string()
456    }
457
458    /// Serve a small file (read into memory)
459    async fn serve_small_file<B>(
460        &self,
461        _req: &Request<B>,
462        file_path: &Path,
463        content_type: &str,
464        file_size: u64,
465        etag: &str,
466        modified: std::time::SystemTime,
467        encoding: ContentEncoding,
468    ) -> Result<Response<Full<Bytes>>> {
469        let content = fs::read(file_path).await?;
470        let content = Bytes::from(content);
471
472        // Compress if needed
473        let (final_content, content_encoding) = if encoding != ContentEncoding::Identity {
474            match compress_content(&content, encoding) {
475                Ok(compressed) if compressed.len() < content.len() => (compressed, Some(encoding)),
476                _ => (content.clone(), None),
477            }
478        } else {
479            (content.clone(), None)
480        };
481
482        // Cache the file
483        if file_size < MAX_CACHE_FILE_SIZE {
484            let gzip_content = if should_compress(content_type) {
485                compress_content(&content, ContentEncoding::Gzip).ok()
486            } else {
487                None
488            };
489
490            let brotli_content = if should_compress(content_type) {
491                compress_content(&content, ContentEncoding::Brotli).ok()
492            } else {
493                None
494            };
495
496            self.cache.insert(
497                file_path.to_path_buf(),
498                CachedFile {
499                    content: content.clone(),
500                    gzip_content,
501                    brotli_content,
502                    content_type: content_type.to_string(),
503                    etag: etag.to_string(),
504                    last_modified: modified,
505                    cached_at: Instant::now(),
506                    size: file_size,
507                },
508            );
509        }
510
511        let mut response = Response::builder()
512            .status(StatusCode::OK)
513            .header(header::CONTENT_TYPE, content_type)
514            .header(header::CONTENT_LENGTH, final_content.len())
515            .header(header::ACCEPT_RANGES, "bytes")
516            .header(header::ETAG, etag)
517            .header(header::LAST_MODIFIED, httpdate::fmt_http_date(modified))
518            .header(header::CACHE_CONTROL, &self.config.cache_control);
519
520        if let Some(enc) = content_encoding {
521            response = response.header(header::CONTENT_ENCODING, enc.as_str());
522            response = response.header(header::VARY, "Accept-Encoding");
523        }
524
525        Ok(response.body(Full::new(final_content))?)
526    }
527
528    /// Serve a large file (streaming)
529    async fn serve_large_file(
530        &self,
531        file_path: &Path,
532        content_type: &str,
533        file_size: u64,
534        etag: &str,
535        modified: std::time::SystemTime,
536        _encoding: ContentEncoding,
537    ) -> Result<Response<Full<Bytes>>> {
538        // For large files, don't compress (streaming compression is complex)
539        // Just read and serve the file
540        let content = fs::read(file_path).await?;
541
542        Ok(Response::builder()
543            .status(StatusCode::OK)
544            .header(header::CONTENT_TYPE, content_type)
545            .header(header::CONTENT_LENGTH, file_size)
546            .header(header::ACCEPT_RANGES, "bytes")
547            .header(header::ETAG, etag)
548            .header(header::LAST_MODIFIED, httpdate::fmt_http_date(modified))
549            .header(header::CACHE_CONTROL, &self.config.cache_control)
550            .body(Full::new(Bytes::from(content)))?)
551    }
552
553    /// Serve a cached file
554    fn serve_cached<B>(
555        &self,
556        req: &Request<B>,
557        cached: CachedFile,
558        encoding: ContentEncoding,
559    ) -> Result<Response<Full<Bytes>>> {
560        // Determine best content to serve based on encoding preference
561        let (content, content_encoding) =
562            match (encoding, &cached.brotli_content, &cached.gzip_content) {
563                (ContentEncoding::Brotli, Some(brotli), _) => {
564                    (brotli.clone(), Some(ContentEncoding::Brotli))
565                }
566                (ContentEncoding::Gzip, _, Some(gzip)) => {
567                    (gzip.clone(), Some(ContentEncoding::Gzip))
568                }
569                _ => (cached.content.clone(), None),
570            };
571
572        // For HEAD, return empty body
573        let body = if req.method() == Method::HEAD {
574            Bytes::new()
575        } else {
576            content
577        };
578
579        let mut response = Response::builder()
580            .status(StatusCode::OK)
581            .header(header::CONTENT_TYPE, &cached.content_type)
582            .header(header::CONTENT_LENGTH, body.len())
583            .header(header::ACCEPT_RANGES, "bytes")
584            .header(header::ETAG, &cached.etag)
585            .header(header::CACHE_CONTROL, &self.config.cache_control)
586            .header(
587                header::LAST_MODIFIED,
588                httpdate::fmt_http_date(cached.last_modified),
589            );
590
591        if let Some(enc) = content_encoding {
592            response = response.header(header::CONTENT_ENCODING, enc.as_str());
593            response = response.header(header::VARY, "Accept-Encoding");
594        }
595
596        Ok(response.body(Full::new(body))?)
597    }
598
599    /// Build HEAD response
600    fn build_head_response(
601        &self,
602        content_type: &str,
603        file_size: u64,
604        etag: &str,
605        modified: std::time::SystemTime,
606    ) -> Result<Response<Full<Bytes>>> {
607        Ok(Response::builder()
608            .status(StatusCode::OK)
609            .header(header::CONTENT_TYPE, content_type)
610            .header(header::CONTENT_LENGTH, file_size)
611            .header(header::ACCEPT_RANGES, "bytes")
612            .header(header::ETAG, etag)
613            .header(header::LAST_MODIFIED, httpdate::fmt_http_date(modified))
614            .header(header::CACHE_CONTROL, &self.config.cache_control)
615            .body(Full::new(Bytes::new()))?)
616    }
617
618    /// Generate directory listing
619    async fn generate_directory_listing(&self, dir_path: &Path) -> Result<Response<Full<Bytes>>> {
620        let mut entries = fs::read_dir(dir_path).await?;
621        let mut items = Vec::new();
622
623        while let Some(entry) = entries.next_entry().await? {
624            let metadata = entry.metadata().await?;
625            let name = entry.file_name().to_string_lossy().to_string();
626            let is_dir = metadata.is_dir();
627            let size = if is_dir { 0 } else { metadata.len() };
628            let modified = metadata.modified()?;
629
630            items.push((name, is_dir, size, modified));
631        }
632
633        // Sort: directories first, then alphabetically
634        items.sort_by(|a, b| match (a.1, b.1) {
635            (true, false) => std::cmp::Ordering::Less,
636            (false, true) => std::cmp::Ordering::Greater,
637            _ => a.0.cmp(&b.0),
638        });
639
640        let path_display = dir_path
641            .strip_prefix(&self.config.root)
642            .unwrap_or(dir_path)
643            .display();
644
645        let mut html = format!(
646            r#"<!DOCTYPE html>
647<html lang="en">
648<head>
649    <meta charset="UTF-8">
650    <meta name="viewport" content="width=device-width, initial-scale=1.0">
651    <title>Index of /{}</title>
652    <style>
653        body {{ font-family: monospace; margin: 20px; }}
654        h1 {{ font-size: 24px; }}
655        table {{ border-collapse: collapse; }}
656        th, td {{ padding: 8px 15px; text-align: left; }}
657        th {{ background: #f0f0f0; }}
658        tr:hover {{ background: #f8f8f8; }}
659        a {{ text-decoration: none; color: #0066cc; }}
660        a:hover {{ text-decoration: underline; }}
661        .dir {{ font-weight: bold; }}
662        .size {{ text-align: right; }}
663    </style>
664</head>
665<body>
666    <h1>Index of /{}</h1>
667    <table>
668        <tr><th>Name</th><th>Size</th><th>Modified</th></tr>"#,
669            path_display, path_display
670        );
671
672        for (name, is_dir, size, modified) in items {
673            let display_name = if is_dir {
674                format!("{}/", name)
675            } else {
676                name.clone()
677            };
678            let size_str = if is_dir {
679                "-".to_string()
680            } else {
681                format_size(size)
682            };
683            let class = if is_dir { "dir" } else { "" };
684
685            html.push_str(&format!(
686                r#"<tr><td><a href="{}" class="{}">{}</a></td><td class="size">{}</td><td>{}</td></tr>"#,
687                urlencoding::encode(&name),
688                class,
689                html_escape::encode_text(&display_name),
690                size_str,
691                httpdate::fmt_http_date(modified)
692            ));
693        }
694
695        html.push_str("</table></body></html>");
696
697        Ok(Response::builder()
698            .status(StatusCode::OK)
699            .header(header::CONTENT_TYPE, "text/html; charset=utf-8")
700            .body(Full::new(Bytes::from(html)))?)
701    }
702
703    /// Generate 404 Not Found response
704    fn not_found_response(&self) -> Result<Response<Full<Bytes>>> {
705        Ok(Response::builder()
706            .status(StatusCode::NOT_FOUND)
707            .header(header::CONTENT_TYPE, "text/plain")
708            .body(Full::new(Bytes::from_static(b"404 Not Found")))?)
709    }
710}
711
712// ============================================================================
713// Helpers
714// ============================================================================
715
716/// Format file size for display
717fn format_size(size: u64) -> String {
718    const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
719    let mut size = size as f64;
720    let mut unit_index = 0;
721
722    while size >= 1024.0 && unit_index < UNITS.len() - 1 {
723        size /= 1024.0;
724        unit_index += 1;
725    }
726
727    if unit_index == 0 {
728        format!("{} {}", size as u64, UNITS[unit_index])
729    } else {
730        format!("{:.1} {}", size, UNITS[unit_index])
731    }
732}
733
734#[cfg(test)]
735mod tests {
736    use super::*;
737    use tempfile::TempDir;
738
739    #[tokio::test]
740    async fn test_static_file_server() {
741        let temp_dir = TempDir::new().unwrap();
742        let root = temp_dir.path().to_path_buf();
743
744        // Create test files
745        std::fs::write(root.join("test.txt"), "Hello, World!").unwrap();
746        std::fs::write(root.join("style.css"), "body { color: red; }").unwrap();
747
748        let config = StaticFileConfig {
749            root: root.clone(),
750            index: "index.html".to_string(),
751            directory_listing: true,
752            cache_control: "public, max-age=3600".to_string(),
753            compress: true,
754            mime_types: std::collections::HashMap::new(),
755            fallback: None,
756        };
757
758        let server = StaticFileServer::new(config);
759
760        // Test serving a file
761        let req = Request::builder()
762            .method(Method::GET)
763            .uri("/test.txt")
764            .body(())
765            .unwrap();
766
767        let response = server.serve(&req, "/test.txt").await.unwrap();
768        assert_eq!(response.status(), StatusCode::OK);
769    }
770
771    #[test]
772    fn test_format_size() {
773        assert_eq!(format_size(0), "0 B");
774        assert_eq!(format_size(512), "512 B");
775        assert_eq!(format_size(1024), "1.0 KB");
776        assert_eq!(format_size(1536), "1.5 KB");
777        assert_eq!(format_size(1024 * 1024), "1.0 MB");
778        assert_eq!(format_size(1024 * 1024 * 1024), "1.0 GB");
779    }
780}