sentinel_proxy/static_files/
mod.rs

1//! Static file serving module for Sentinel proxy
2//!
3//! This module provides high-performance static file serving with:
4//! - Range requests (206 Partial Content) for resumable downloads and video seeking
5//! - Zero-copy file serving using memory-mapped files for large files
6//! - On-the-fly gzip/brotli compression
7//! - In-memory caching for small files
8//! - Directory listing and SPA routing
9//!
10//! # Module Structure
11//!
12//! - [`cache`]: File caching with pre-computed compression
13//! - [`compression`]: Content encoding and compression utilities
14//! - [`range`]: HTTP Range request handling
15
16mod cache;
17mod compression;
18mod range;
19
20pub use cache::{CachedFile, CacheStats, FileCache};
21pub use compression::ContentEncoding;
22
23use anyhow::Result;
24use bytes::Bytes;
25use http::{header, Method, Request, Response, StatusCode};
26use http_body_util::Full;
27use mime_guess::from_path;
28use std::path::{Component, Path, PathBuf};
29use std::sync::Arc;
30use std::time::Instant;
31use tokio::fs;
32use tracing::{debug, trace, warn};
33
34use sentinel_config::StaticFileConfig;
35
36use compression::{compress_content, negotiate_encoding, should_compress};
37use range::serve_range_request;
38
39// ============================================================================
40// Constants
41// ============================================================================
42
43/// Minimum file size for compression (1KB) - smaller files have overhead
44const MIN_COMPRESS_SIZE: u64 = 1024;
45
46/// Maximum file size to cache in memory (1MB)
47const MAX_CACHE_FILE_SIZE: u64 = 1024 * 1024;
48
49/// File size threshold for memory-mapped serving (10MB)
50const MMAP_THRESHOLD: u64 = 10 * 1024 * 1024;
51
52// ============================================================================
53// Static File Server
54// ============================================================================
55
56/// Static file server
57pub struct StaticFileServer {
58    /// Configuration for static file serving
59    config: Arc<StaticFileConfig>,
60    /// Cached file metadata
61    cache: Arc<FileCache>,
62}
63
64impl StaticFileServer {
65    /// Create a new static file server
66    pub fn new(config: StaticFileConfig) -> Self {
67        trace!(
68            root = %config.root.display(),
69            index = %config.index,
70            directory_listing = config.directory_listing,
71            compress = config.compress,
72            has_fallback = config.fallback.is_some(),
73            "Creating static file server"
74        );
75
76        let cache = Arc::new(FileCache::with_defaults());
77
78        debug!(
79            root = %config.root.display(),
80            "Static file server initialized"
81        );
82
83        Self {
84            config: Arc::new(config),
85            cache,
86        }
87    }
88
89    /// Get cache statistics
90    pub fn cache_stats(&self) -> CacheStats {
91        let stats = self.cache.stats();
92        trace!(
93            entries = stats.entry_count,
94            total_size = stats.total_size,
95            "Retrieved cache stats"
96        );
97        stats
98    }
99
100    /// Clear the file cache
101    pub fn clear_cache(&self) {
102        let before = self.cache.stats().entry_count;
103        self.cache.clear();
104        debug!(
105            cleared_entries = before,
106            "File cache cleared"
107        );
108    }
109
110    /// Serve a static file request
111    pub async fn serve<B>(&self, req: &Request<B>, path: &str) -> Result<Response<Full<Bytes>>> {
112        trace!(
113            method = %req.method(),
114            path = %path,
115            "Serving static file request"
116        );
117
118        // Validate request method
119        match req.method() {
120            &Method::GET | &Method::HEAD => {}
121            _ => {
122                debug!(
123                    method = %req.method(),
124                    path = %path,
125                    "Method not allowed for static file"
126                );
127                return Ok(Response::builder()
128                    .status(StatusCode::METHOD_NOT_ALLOWED)
129                    .header(header::ALLOW, "GET, HEAD")
130                    .body(Full::new(Bytes::new()))?);
131            }
132        }
133
134        // Resolve path securely
135        let file_path = match self.resolve_path(path) {
136            Some(p) => p,
137            None => {
138                debug!(
139                    path = %path,
140                    "Path resolution failed or blocked"
141                );
142                return self.not_found_response();
143            }
144        };
145
146        trace!(
147            request_path = %path,
148            resolved_path = %file_path.display(),
149            "Path resolved"
150        );
151
152        // Check if path is a directory
153        let metadata = match fs::metadata(&file_path).await {
154            Ok(m) => m,
155            Err(e) => {
156                trace!(
157                    path = %file_path.display(),
158                    error = %e,
159                    "File metadata lookup failed"
160                );
161                // File not found - check for SPA fallback
162                if self.config.fallback.is_some() {
163                    if let Some(index_path) = self.find_spa_fallback() {
164                        trace!(
165                            original_path = %path,
166                            fallback_path = %index_path.display(),
167                            "Using SPA fallback"
168                        );
169                        let meta = fs::metadata(&index_path).await?;
170                        return self.serve_file(req, &index_path, meta).await;
171                    }
172                }
173                return self.not_found_response();
174            }
175        };
176
177        if metadata.is_dir() {
178            trace!(
179                path = %file_path.display(),
180                "Path is directory, looking for index"
181            );
182            // Try to serve index file
183            for index_file in &["index.html", "index.htm"] {
184                let index_path = file_path.join(index_file);
185                if let Ok(index_meta) = fs::metadata(&index_path).await {
186                    if index_meta.is_file() {
187                        trace!(
188                            index_file = %index_file,
189                            "Found index file"
190                        );
191                        return self.serve_file(req, &index_path, index_meta).await;
192                    }
193                }
194            }
195
196            // Directory listing if enabled
197            if self.config.directory_listing {
198                trace!(
199                    path = %file_path.display(),
200                    "Generating directory listing"
201                );
202                return self.generate_directory_listing(&file_path).await;
203            }
204
205            debug!(
206                path = %file_path.display(),
207                "Directory listing not allowed"
208            );
209            return Ok(Response::builder()
210                .status(StatusCode::FORBIDDEN)
211                .body(Full::new(Bytes::from_static(b"Directory listing not allowed")))?);
212        }
213
214        // Serve the file
215        self.serve_file(req, &file_path, metadata).await
216    }
217
218    /// Resolve path securely
219    fn resolve_path(&self, path: &str) -> Option<PathBuf> {
220        // Remove leading slash and decode URL encoding
221        let path = path.trim_start_matches('/');
222        let decoded = urlencoding::decode(path).ok()?;
223
224        // Build path and validate it doesn't escape root
225        let mut resolved = self.config.root.clone();
226        for component in Path::new(decoded.as_ref()).components() {
227            match component {
228                Component::Normal(c) => resolved.push(c),
229                Component::ParentDir => {
230                    // Reject any path traversal attempts
231                    warn!("Path traversal attempt detected: {}", path);
232                    return None;
233                }
234                Component::CurDir => {}
235                _ => return None,
236            }
237        }
238
239        // Verify path is within root
240        if !resolved.starts_with(&self.config.root) {
241            warn!(
242                "Path escapes root directory: {:?} (root: {:?})",
243                resolved, self.config.root
244            );
245            return None;
246        }
247
248        Some(resolved)
249    }
250
251    /// Find SPA fallback index file
252    fn find_spa_fallback(&self) -> Option<PathBuf> {
253        if let Some(ref fallback) = self.config.fallback {
254            let index_path = self.config.root.join(fallback);
255            if index_path.exists() {
256                return Some(index_path);
257            }
258        }
259        None
260    }
261
262    /// Serve a file
263    async fn serve_file<B>(
264        &self,
265        req: &Request<B>,
266        file_path: &Path,
267        metadata: std::fs::Metadata,
268    ) -> Result<Response<Full<Bytes>>> {
269        let modified = metadata.modified()?;
270        let file_size = metadata.len();
271
272        trace!(
273            path = %file_path.display(),
274            size = file_size,
275            "Serving file"
276        );
277
278        // Generate ETag based on size and modification time
279        let etag = self.generate_etag_from_metadata(file_size, modified);
280
281        // Check conditional headers (If-None-Match, If-Modified-Since)
282        if let Some(response) = self.check_conditional_headers(req, &etag, modified)? {
283            trace!(
284                path = %file_path.display(),
285                status = 304,
286                "Returning 304 Not Modified"
287            );
288            return Ok(response);
289        }
290
291        // Determine content type
292        let content_type = self.get_content_type(file_path);
293
294        // Negotiate content encoding
295        let encoding = if self.config.compress
296            && should_compress(&content_type)
297            && file_size >= MIN_COMPRESS_SIZE
298        {
299            negotiate_encoding(req)
300        } else {
301            ContentEncoding::Identity
302        };
303
304        trace!(
305            path = %file_path.display(),
306            content_type = %content_type,
307            encoding = ?encoding,
308            "Content negotiation complete"
309        );
310
311        // Check for Range header
312        if let Some(range_header) = req.headers().get(header::RANGE) {
313            trace!(
314                path = %file_path.display(),
315                "Processing range request"
316            );
317            return serve_range_request(
318                req,
319                file_path,
320                file_size,
321                &content_type,
322                &etag,
323                modified,
324                range_header,
325                &self.config.cache_control,
326            )
327            .await;
328        }
329
330        // Check cache for small files
331        if file_size < MAX_CACHE_FILE_SIZE {
332            if let Some(cached) = self.cache.get(file_path) {
333                if cached.is_fresh() && cached.size == file_size {
334                    trace!(
335                        path = %file_path.display(),
336                        "Serving from cache"
337                    );
338                    return self.serve_cached(req, cached, encoding);
339                }
340            }
341        }
342
343        // For HEAD requests, return headers only
344        if req.method() == Method::HEAD {
345            trace!(
346                path = %file_path.display(),
347                "Serving HEAD request"
348            );
349            return self.build_head_response(&content_type, file_size, &etag, modified);
350        }
351
352        // Serve the file based on size
353        if file_size >= MMAP_THRESHOLD {
354            // Large file: stream it
355            debug!(
356                path = %file_path.display(),
357                size = file_size,
358                "Serving large file"
359            );
360            self.serve_large_file(file_path, &content_type, file_size, &etag, modified, encoding)
361                .await
362        } else {
363            // Small/medium file: read into memory
364            trace!(
365                path = %file_path.display(),
366                size = file_size,
367                "Serving small/medium file"
368            );
369            self.serve_small_file(req, file_path, &content_type, file_size, &etag, modified, encoding)
370                .await
371        }
372    }
373
374    /// Generate ETag from file metadata
375    fn generate_etag_from_metadata(&self, size: u64, modified: std::time::SystemTime) -> String {
376        let modified_ts = modified
377            .duration_since(std::time::UNIX_EPOCH)
378            .unwrap_or_default()
379            .as_secs();
380        format!("\"{:x}-{:x}\"", size, modified_ts)
381    }
382
383    /// Check conditional headers and return 304 if appropriate
384    fn check_conditional_headers<B>(
385        &self,
386        req: &Request<B>,
387        etag: &str,
388        modified: std::time::SystemTime,
389    ) -> Result<Option<Response<Full<Bytes>>>> {
390        // Check If-None-Match (ETag)
391        if let Some(if_none_match) = req.headers().get(header::IF_NONE_MATCH) {
392            if let Ok(if_none_match_str) = if_none_match.to_str() {
393                // Handle multiple ETags separated by commas
394                let matches = if_none_match_str == "*"
395                    || if_none_match_str
396                        .split(',')
397                        .any(|tag| tag.trim().trim_matches('"') == etag.trim_matches('"'));
398
399                if matches {
400                    return Ok(Some(
401                        Response::builder()
402                            .status(StatusCode::NOT_MODIFIED)
403                            .header(header::ETAG, etag)
404                            .body(Full::new(Bytes::new()))?,
405                    ));
406                }
407            }
408        }
409
410        // Check If-Modified-Since
411        if let Some(if_modified) = req.headers().get(header::IF_MODIFIED_SINCE) {
412            if let Ok(if_modified_str) = if_modified.to_str() {
413                if let Ok(if_modified_time) = httpdate::parse_http_date(if_modified_str) {
414                    // Only compare seconds (HTTP dates have second precision)
415                    let modified_secs = modified
416                        .duration_since(std::time::UNIX_EPOCH)
417                        .unwrap_or_default()
418                        .as_secs();
419                    let if_modified_secs = if_modified_time
420                        .duration_since(std::time::UNIX_EPOCH)
421                        .unwrap_or_default()
422                        .as_secs();
423
424                    if modified_secs <= if_modified_secs {
425                        return Ok(Some(
426                            Response::builder()
427                                .status(StatusCode::NOT_MODIFIED)
428                                .header(header::ETAG, etag)
429                                .body(Full::new(Bytes::new()))?,
430                        ));
431                    }
432                }
433            }
434        }
435
436        Ok(None)
437    }
438
439    /// Get content type for a file
440    fn get_content_type(&self, path: &Path) -> String {
441        from_path(path)
442            .first_or_octet_stream()
443            .as_ref()
444            .to_string()
445    }
446
447    /// Serve a small file (read into memory)
448    async fn serve_small_file<B>(
449        &self,
450        _req: &Request<B>,
451        file_path: &Path,
452        content_type: &str,
453        file_size: u64,
454        etag: &str,
455        modified: std::time::SystemTime,
456        encoding: ContentEncoding,
457    ) -> Result<Response<Full<Bytes>>> {
458        let content = fs::read(file_path).await?;
459        let content = Bytes::from(content);
460
461        // Compress if needed
462        let (final_content, content_encoding) = if encoding != ContentEncoding::Identity {
463            match compress_content(&content, encoding) {
464                Ok(compressed) if compressed.len() < content.len() => (compressed, Some(encoding)),
465                _ => (content.clone(), None),
466            }
467        } else {
468            (content.clone(), None)
469        };
470
471        // Cache the file
472        if file_size < MAX_CACHE_FILE_SIZE {
473            let gzip_content = if should_compress(content_type) {
474                compress_content(&content, ContentEncoding::Gzip).ok()
475            } else {
476                None
477            };
478
479            let brotli_content = if should_compress(content_type) {
480                compress_content(&content, ContentEncoding::Brotli).ok()
481            } else {
482                None
483            };
484
485            self.cache.insert(
486                file_path.to_path_buf(),
487                CachedFile {
488                    content: content.clone(),
489                    gzip_content,
490                    brotli_content,
491                    content_type: content_type.to_string(),
492                    etag: etag.to_string(),
493                    last_modified: modified,
494                    cached_at: Instant::now(),
495                    size: file_size,
496                },
497            );
498        }
499
500        let mut response = Response::builder()
501            .status(StatusCode::OK)
502            .header(header::CONTENT_TYPE, content_type)
503            .header(header::CONTENT_LENGTH, final_content.len())
504            .header(header::ACCEPT_RANGES, "bytes")
505            .header(header::ETAG, etag)
506            .header(header::LAST_MODIFIED, httpdate::fmt_http_date(modified))
507            .header(header::CACHE_CONTROL, &self.config.cache_control);
508
509        if let Some(enc) = content_encoding {
510            response = response.header(header::CONTENT_ENCODING, enc.as_str());
511            response = response.header(header::VARY, "Accept-Encoding");
512        }
513
514        Ok(response.body(Full::new(final_content))?)
515    }
516
517    /// Serve a large file (streaming)
518    async fn serve_large_file(
519        &self,
520        file_path: &Path,
521        content_type: &str,
522        file_size: u64,
523        etag: &str,
524        modified: std::time::SystemTime,
525        _encoding: ContentEncoding,
526    ) -> Result<Response<Full<Bytes>>> {
527        // For large files, don't compress (streaming compression is complex)
528        // Just read and serve the file
529        let content = fs::read(file_path).await?;
530
531        Ok(Response::builder()
532            .status(StatusCode::OK)
533            .header(header::CONTENT_TYPE, content_type)
534            .header(header::CONTENT_LENGTH, file_size)
535            .header(header::ACCEPT_RANGES, "bytes")
536            .header(header::ETAG, etag)
537            .header(header::LAST_MODIFIED, httpdate::fmt_http_date(modified))
538            .header(header::CACHE_CONTROL, &self.config.cache_control)
539            .body(Full::new(Bytes::from(content)))?)
540    }
541
542    /// Serve a cached file
543    fn serve_cached<B>(
544        &self,
545        req: &Request<B>,
546        cached: CachedFile,
547        encoding: ContentEncoding,
548    ) -> Result<Response<Full<Bytes>>> {
549        // Determine best content to serve based on encoding preference
550        let (content, content_encoding) =
551            match (encoding, &cached.brotli_content, &cached.gzip_content) {
552                (ContentEncoding::Brotli, Some(brotli), _) => {
553                    (brotli.clone(), Some(ContentEncoding::Brotli))
554                }
555                (ContentEncoding::Gzip, _, Some(gzip)) => {
556                    (gzip.clone(), Some(ContentEncoding::Gzip))
557                }
558                _ => (cached.content.clone(), None),
559            };
560
561        // For HEAD, return empty body
562        let body = if req.method() == Method::HEAD {
563            Bytes::new()
564        } else {
565            content
566        };
567
568        let mut response = Response::builder()
569            .status(StatusCode::OK)
570            .header(header::CONTENT_TYPE, &cached.content_type)
571            .header(header::CONTENT_LENGTH, body.len())
572            .header(header::ACCEPT_RANGES, "bytes")
573            .header(header::ETAG, &cached.etag)
574            .header(header::CACHE_CONTROL, &self.config.cache_control)
575            .header(
576                header::LAST_MODIFIED,
577                httpdate::fmt_http_date(cached.last_modified),
578            );
579
580        if let Some(enc) = content_encoding {
581            response = response.header(header::CONTENT_ENCODING, enc.as_str());
582            response = response.header(header::VARY, "Accept-Encoding");
583        }
584
585        Ok(response.body(Full::new(body))?)
586    }
587
588    /// Build HEAD response
589    fn build_head_response(
590        &self,
591        content_type: &str,
592        file_size: u64,
593        etag: &str,
594        modified: std::time::SystemTime,
595    ) -> Result<Response<Full<Bytes>>> {
596        Ok(Response::builder()
597            .status(StatusCode::OK)
598            .header(header::CONTENT_TYPE, content_type)
599            .header(header::CONTENT_LENGTH, file_size)
600            .header(header::ACCEPT_RANGES, "bytes")
601            .header(header::ETAG, etag)
602            .header(header::LAST_MODIFIED, httpdate::fmt_http_date(modified))
603            .header(header::CACHE_CONTROL, &self.config.cache_control)
604            .body(Full::new(Bytes::new()))?)
605    }
606
607    /// Generate directory listing
608    async fn generate_directory_listing(&self, dir_path: &Path) -> Result<Response<Full<Bytes>>> {
609        let mut entries = fs::read_dir(dir_path).await?;
610        let mut items = Vec::new();
611
612        while let Some(entry) = entries.next_entry().await? {
613            let metadata = entry.metadata().await?;
614            let name = entry.file_name().to_string_lossy().to_string();
615            let is_dir = metadata.is_dir();
616            let size = if is_dir { 0 } else { metadata.len() };
617            let modified = metadata.modified()?;
618
619            items.push((name, is_dir, size, modified));
620        }
621
622        // Sort: directories first, then alphabetically
623        items.sort_by(|a, b| match (a.1, b.1) {
624            (true, false) => std::cmp::Ordering::Less,
625            (false, true) => std::cmp::Ordering::Greater,
626            _ => a.0.cmp(&b.0),
627        });
628
629        let path_display = dir_path
630            .strip_prefix(&self.config.root)
631            .unwrap_or(dir_path)
632            .display();
633
634        let mut html = format!(
635            r#"<!DOCTYPE html>
636<html lang="en">
637<head>
638    <meta charset="UTF-8">
639    <meta name="viewport" content="width=device-width, initial-scale=1.0">
640    <title>Index of /{}</title>
641    <style>
642        body {{ font-family: monospace; margin: 20px; }}
643        h1 {{ font-size: 24px; }}
644        table {{ border-collapse: collapse; }}
645        th, td {{ padding: 8px 15px; text-align: left; }}
646        th {{ background: #f0f0f0; }}
647        tr:hover {{ background: #f8f8f8; }}
648        a {{ text-decoration: none; color: #0066cc; }}
649        a:hover {{ text-decoration: underline; }}
650        .dir {{ font-weight: bold; }}
651        .size {{ text-align: right; }}
652    </style>
653</head>
654<body>
655    <h1>Index of /{}</h1>
656    <table>
657        <tr><th>Name</th><th>Size</th><th>Modified</th></tr>"#,
658            path_display, path_display
659        );
660
661        for (name, is_dir, size, modified) in items {
662            let display_name = if is_dir {
663                format!("{}/", name)
664            } else {
665                name.clone()
666            };
667            let size_str = if is_dir {
668                "-".to_string()
669            } else {
670                format_size(size)
671            };
672            let class = if is_dir { "dir" } else { "" };
673
674            html.push_str(&format!(
675                r#"<tr><td><a href="{}" class="{}">{}</a></td><td class="size">{}</td><td>{}</td></tr>"#,
676                urlencoding::encode(&name),
677                class,
678                html_escape::encode_text(&display_name),
679                size_str,
680                httpdate::fmt_http_date(modified)
681            ));
682        }
683
684        html.push_str("</table></body></html>");
685
686        Ok(Response::builder()
687            .status(StatusCode::OK)
688            .header(header::CONTENT_TYPE, "text/html; charset=utf-8")
689            .body(Full::new(Bytes::from(html)))?)
690    }
691
692    /// Generate 404 Not Found response
693    fn not_found_response(&self) -> Result<Response<Full<Bytes>>> {
694        Ok(Response::builder()
695            .status(StatusCode::NOT_FOUND)
696            .header(header::CONTENT_TYPE, "text/plain")
697            .body(Full::new(Bytes::from_static(b"404 Not Found")))?)
698    }
699}
700
701// ============================================================================
702// Helpers
703// ============================================================================
704
705/// Format file size for display
706fn format_size(size: u64) -> String {
707    const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
708    let mut size = size as f64;
709    let mut unit_index = 0;
710
711    while size >= 1024.0 && unit_index < UNITS.len() - 1 {
712        size /= 1024.0;
713        unit_index += 1;
714    }
715
716    if unit_index == 0 {
717        format!("{} {}", size as u64, UNITS[unit_index])
718    } else {
719        format!("{:.1} {}", size, UNITS[unit_index])
720    }
721}
722
723#[cfg(test)]
724mod tests {
725    use super::*;
726    use tempfile::TempDir;
727
728    #[tokio::test]
729    async fn test_static_file_server() {
730        let temp_dir = TempDir::new().unwrap();
731        let root = temp_dir.path().to_path_buf();
732
733        // Create test files
734        std::fs::write(root.join("test.txt"), "Hello, World!").unwrap();
735        std::fs::write(root.join("style.css"), "body { color: red; }").unwrap();
736
737        let config = StaticFileConfig {
738            root: root.clone(),
739            index: "index.html".to_string(),
740            directory_listing: true,
741            cache_control: "public, max-age=3600".to_string(),
742            compress: true,
743            mime_types: std::collections::HashMap::new(),
744            fallback: None,
745        };
746
747        let server = StaticFileServer::new(config);
748
749        // Test serving a file
750        let req = Request::builder()
751            .method(Method::GET)
752            .uri("/test.txt")
753            .body(())
754            .unwrap();
755
756        let response = server.serve(&req, "/test.txt").await.unwrap();
757        assert_eq!(response.status(), StatusCode::OK);
758    }
759
760    #[test]
761    fn test_format_size() {
762        assert_eq!(format_size(0), "0 B");
763        assert_eq!(format_size(512), "512 B");
764        assert_eq!(format_size(1024), "1.0 KB");
765        assert_eq!(format_size(1536), "1.5 KB");
766        assert_eq!(format_size(1024 * 1024), "1.0 MB");
767        assert_eq!(format_size(1024 * 1024 * 1024), "1.0 GB");
768    }
769}