sentinel_proxy/static_files/
mod.rs

1//! Static file serving module for Sentinel proxy
2//!
3//! This module provides high-performance static file serving with:
4//! - Range requests (206 Partial Content) for resumable downloads and video seeking
5//! - Zero-copy file serving using memory-mapped files for large files
6//! - On-the-fly gzip/brotli compression
7//! - In-memory caching for small files
8//! - Directory listing and SPA routing
9//!
10//! # Module Structure
11//!
12//! - [`cache`]: File caching with pre-computed compression
13//! - [`compression`]: Content encoding and compression utilities
14//! - [`range`]: HTTP Range request handling
15
16mod cache;
17mod compression;
18mod range;
19
20pub use cache::{CachedFile, CacheStats, FileCache};
21pub use compression::ContentEncoding;
22
23use anyhow::Result;
24use bytes::Bytes;
25use http::{header, Method, Request, Response, StatusCode};
26use http_body_util::Full;
27use mime_guess::from_path;
28use std::path::{Component, Path, PathBuf};
29use std::sync::Arc;
30use std::time::Instant;
31use tokio::fs;
32use tracing::warn;
33
34use sentinel_config::StaticFileConfig;
35
36use compression::{compress_content, negotiate_encoding, should_compress};
37use range::serve_range_request;
38
39// ============================================================================
40// Constants
41// ============================================================================
42
43/// Minimum file size for compression (1KB) - smaller files have overhead
44const MIN_COMPRESS_SIZE: u64 = 1024;
45
46/// Maximum file size to cache in memory (1MB)
47const MAX_CACHE_FILE_SIZE: u64 = 1024 * 1024;
48
49/// File size threshold for memory-mapped serving (10MB)
50const MMAP_THRESHOLD: u64 = 10 * 1024 * 1024;
51
52// ============================================================================
53// Static File Server
54// ============================================================================
55
56/// Static file server
57pub struct StaticFileServer {
58    /// Configuration for static file serving
59    config: Arc<StaticFileConfig>,
60    /// Cached file metadata
61    cache: Arc<FileCache>,
62}
63
64impl StaticFileServer {
65    /// Create a new static file server
66    pub fn new(config: StaticFileConfig) -> Self {
67        let cache = Arc::new(FileCache::with_defaults());
68
69        Self {
70            config: Arc::new(config),
71            cache,
72        }
73    }
74
75    /// Get cache statistics
76    pub fn cache_stats(&self) -> CacheStats {
77        self.cache.stats()
78    }
79
80    /// Clear the file cache
81    pub fn clear_cache(&self) {
82        self.cache.clear();
83    }
84
85    /// Serve a static file request
86    pub async fn serve<B>(&self, req: &Request<B>, path: &str) -> Result<Response<Full<Bytes>>> {
87        // Validate request method
88        match req.method() {
89            &Method::GET | &Method::HEAD => {}
90            _ => {
91                return Ok(Response::builder()
92                    .status(StatusCode::METHOD_NOT_ALLOWED)
93                    .header(header::ALLOW, "GET, HEAD")
94                    .body(Full::new(Bytes::new()))?);
95            }
96        }
97
98        // Resolve path securely
99        let file_path = match self.resolve_path(path) {
100            Some(p) => p,
101            None => {
102                return self.not_found_response();
103            }
104        };
105
106        // Check if path is a directory
107        let metadata = match fs::metadata(&file_path).await {
108            Ok(m) => m,
109            Err(_) => {
110                // File not found - check for SPA fallback
111                if self.config.fallback.is_some() {
112                    if let Some(index_path) = self.find_spa_fallback() {
113                        let meta = fs::metadata(&index_path).await?;
114                        return self.serve_file(req, &index_path, meta).await;
115                    }
116                }
117                return self.not_found_response();
118            }
119        };
120
121        if metadata.is_dir() {
122            // Try to serve index file
123            for index_file in &["index.html", "index.htm"] {
124                let index_path = file_path.join(index_file);
125                if let Ok(index_meta) = fs::metadata(&index_path).await {
126                    if index_meta.is_file() {
127                        return self.serve_file(req, &index_path, index_meta).await;
128                    }
129                }
130            }
131
132            // Directory listing if enabled
133            if self.config.directory_listing {
134                return self.generate_directory_listing(&file_path).await;
135            }
136
137            return Ok(Response::builder()
138                .status(StatusCode::FORBIDDEN)
139                .body(Full::new(Bytes::from_static(b"Directory listing not allowed")))?);
140        }
141
142        // Serve the file
143        self.serve_file(req, &file_path, metadata).await
144    }
145
146    /// Resolve path securely
147    fn resolve_path(&self, path: &str) -> Option<PathBuf> {
148        // Remove leading slash and decode URL encoding
149        let path = path.trim_start_matches('/');
150        let decoded = urlencoding::decode(path).ok()?;
151
152        // Build path and validate it doesn't escape root
153        let mut resolved = self.config.root.clone();
154        for component in Path::new(decoded.as_ref()).components() {
155            match component {
156                Component::Normal(c) => resolved.push(c),
157                Component::ParentDir => {
158                    // Reject any path traversal attempts
159                    warn!("Path traversal attempt detected: {}", path);
160                    return None;
161                }
162                Component::CurDir => {}
163                _ => return None,
164            }
165        }
166
167        // Verify path is within root
168        if !resolved.starts_with(&self.config.root) {
169            warn!(
170                "Path escapes root directory: {:?} (root: {:?})",
171                resolved, self.config.root
172            );
173            return None;
174        }
175
176        Some(resolved)
177    }
178
179    /// Find SPA fallback index file
180    fn find_spa_fallback(&self) -> Option<PathBuf> {
181        if let Some(ref fallback) = self.config.fallback {
182            let index_path = self.config.root.join(fallback);
183            if index_path.exists() {
184                return Some(index_path);
185            }
186        }
187        None
188    }
189
190    /// Serve a file
191    async fn serve_file<B>(
192        &self,
193        req: &Request<B>,
194        file_path: &Path,
195        metadata: std::fs::Metadata,
196    ) -> Result<Response<Full<Bytes>>> {
197        let modified = metadata.modified()?;
198        let file_size = metadata.len();
199
200        // Generate ETag based on size and modification time
201        let etag = self.generate_etag_from_metadata(file_size, modified);
202
203        // Check conditional headers (If-None-Match, If-Modified-Since)
204        if let Some(response) = self.check_conditional_headers(req, &etag, modified)? {
205            return Ok(response);
206        }
207
208        // Determine content type
209        let content_type = self.get_content_type(file_path);
210
211        // Negotiate content encoding
212        let encoding = if self.config.compress
213            && should_compress(&content_type)
214            && file_size >= MIN_COMPRESS_SIZE
215        {
216            negotiate_encoding(req)
217        } else {
218            ContentEncoding::Identity
219        };
220
221        // Check for Range header
222        if let Some(range_header) = req.headers().get(header::RANGE) {
223            return serve_range_request(
224                req,
225                file_path,
226                file_size,
227                &content_type,
228                &etag,
229                modified,
230                range_header,
231                &self.config.cache_control,
232            )
233            .await;
234        }
235
236        // Check cache for small files
237        if file_size < MAX_CACHE_FILE_SIZE {
238            if let Some(cached) = self.cache.get(file_path) {
239                if cached.is_fresh() && cached.size == file_size {
240                    return self.serve_cached(req, cached, encoding);
241                }
242            }
243        }
244
245        // For HEAD requests, return headers only
246        if req.method() == Method::HEAD {
247            return self.build_head_response(&content_type, file_size, &etag, modified);
248        }
249
250        // Serve the file based on size
251        if file_size >= MMAP_THRESHOLD {
252            // Large file: stream it
253            self.serve_large_file(file_path, &content_type, file_size, &etag, modified, encoding)
254                .await
255        } else {
256            // Small/medium file: read into memory
257            self.serve_small_file(req, file_path, &content_type, file_size, &etag, modified, encoding)
258                .await
259        }
260    }
261
262    /// Generate ETag from file metadata
263    fn generate_etag_from_metadata(&self, size: u64, modified: std::time::SystemTime) -> String {
264        let modified_ts = modified
265            .duration_since(std::time::UNIX_EPOCH)
266            .unwrap_or_default()
267            .as_secs();
268        format!("\"{:x}-{:x}\"", size, modified_ts)
269    }
270
271    /// Check conditional headers and return 304 if appropriate
272    fn check_conditional_headers<B>(
273        &self,
274        req: &Request<B>,
275        etag: &str,
276        modified: std::time::SystemTime,
277    ) -> Result<Option<Response<Full<Bytes>>>> {
278        // Check If-None-Match (ETag)
279        if let Some(if_none_match) = req.headers().get(header::IF_NONE_MATCH) {
280            if let Ok(if_none_match_str) = if_none_match.to_str() {
281                // Handle multiple ETags separated by commas
282                let matches = if_none_match_str == "*"
283                    || if_none_match_str
284                        .split(',')
285                        .any(|tag| tag.trim().trim_matches('"') == etag.trim_matches('"'));
286
287                if matches {
288                    return Ok(Some(
289                        Response::builder()
290                            .status(StatusCode::NOT_MODIFIED)
291                            .header(header::ETAG, etag)
292                            .body(Full::new(Bytes::new()))?,
293                    ));
294                }
295            }
296        }
297
298        // Check If-Modified-Since
299        if let Some(if_modified) = req.headers().get(header::IF_MODIFIED_SINCE) {
300            if let Ok(if_modified_str) = if_modified.to_str() {
301                if let Ok(if_modified_time) = httpdate::parse_http_date(if_modified_str) {
302                    // Only compare seconds (HTTP dates have second precision)
303                    let modified_secs = modified
304                        .duration_since(std::time::UNIX_EPOCH)
305                        .unwrap_or_default()
306                        .as_secs();
307                    let if_modified_secs = if_modified_time
308                        .duration_since(std::time::UNIX_EPOCH)
309                        .unwrap_or_default()
310                        .as_secs();
311
312                    if modified_secs <= if_modified_secs {
313                        return Ok(Some(
314                            Response::builder()
315                                .status(StatusCode::NOT_MODIFIED)
316                                .header(header::ETAG, etag)
317                                .body(Full::new(Bytes::new()))?,
318                        ));
319                    }
320                }
321            }
322        }
323
324        Ok(None)
325    }
326
327    /// Get content type for a file
328    fn get_content_type(&self, path: &Path) -> String {
329        from_path(path)
330            .first_or_octet_stream()
331            .as_ref()
332            .to_string()
333    }
334
335    /// Serve a small file (read into memory)
336    async fn serve_small_file<B>(
337        &self,
338        _req: &Request<B>,
339        file_path: &Path,
340        content_type: &str,
341        file_size: u64,
342        etag: &str,
343        modified: std::time::SystemTime,
344        encoding: ContentEncoding,
345    ) -> Result<Response<Full<Bytes>>> {
346        let content = fs::read(file_path).await?;
347        let content = Bytes::from(content);
348
349        // Compress if needed
350        let (final_content, content_encoding) = if encoding != ContentEncoding::Identity {
351            match compress_content(&content, encoding) {
352                Ok(compressed) if compressed.len() < content.len() => (compressed, Some(encoding)),
353                _ => (content.clone(), None),
354            }
355        } else {
356            (content.clone(), None)
357        };
358
359        // Cache the file
360        if file_size < MAX_CACHE_FILE_SIZE {
361            let gzip_content = if should_compress(content_type) {
362                compress_content(&content, ContentEncoding::Gzip).ok()
363            } else {
364                None
365            };
366
367            let brotli_content = if should_compress(content_type) {
368                compress_content(&content, ContentEncoding::Brotli).ok()
369            } else {
370                None
371            };
372
373            self.cache.insert(
374                file_path.to_path_buf(),
375                CachedFile {
376                    content: content.clone(),
377                    gzip_content,
378                    brotli_content,
379                    content_type: content_type.to_string(),
380                    etag: etag.to_string(),
381                    last_modified: modified,
382                    cached_at: Instant::now(),
383                    size: file_size,
384                },
385            );
386        }
387
388        let mut response = Response::builder()
389            .status(StatusCode::OK)
390            .header(header::CONTENT_TYPE, content_type)
391            .header(header::CONTENT_LENGTH, final_content.len())
392            .header(header::ACCEPT_RANGES, "bytes")
393            .header(header::ETAG, etag)
394            .header(header::LAST_MODIFIED, httpdate::fmt_http_date(modified))
395            .header(header::CACHE_CONTROL, &self.config.cache_control);
396
397        if let Some(enc) = content_encoding {
398            response = response.header(header::CONTENT_ENCODING, enc.as_str());
399            response = response.header(header::VARY, "Accept-Encoding");
400        }
401
402        Ok(response.body(Full::new(final_content))?)
403    }
404
405    /// Serve a large file (streaming)
406    async fn serve_large_file(
407        &self,
408        file_path: &Path,
409        content_type: &str,
410        file_size: u64,
411        etag: &str,
412        modified: std::time::SystemTime,
413        _encoding: ContentEncoding,
414    ) -> Result<Response<Full<Bytes>>> {
415        // For large files, don't compress (streaming compression is complex)
416        // Just read and serve the file
417        let content = fs::read(file_path).await?;
418
419        Ok(Response::builder()
420            .status(StatusCode::OK)
421            .header(header::CONTENT_TYPE, content_type)
422            .header(header::CONTENT_LENGTH, file_size)
423            .header(header::ACCEPT_RANGES, "bytes")
424            .header(header::ETAG, etag)
425            .header(header::LAST_MODIFIED, httpdate::fmt_http_date(modified))
426            .header(header::CACHE_CONTROL, &self.config.cache_control)
427            .body(Full::new(Bytes::from(content)))?)
428    }
429
430    /// Serve a cached file
431    fn serve_cached<B>(
432        &self,
433        req: &Request<B>,
434        cached: CachedFile,
435        encoding: ContentEncoding,
436    ) -> Result<Response<Full<Bytes>>> {
437        // Determine best content to serve based on encoding preference
438        let (content, content_encoding) =
439            match (encoding, &cached.brotli_content, &cached.gzip_content) {
440                (ContentEncoding::Brotli, Some(brotli), _) => {
441                    (brotli.clone(), Some(ContentEncoding::Brotli))
442                }
443                (ContentEncoding::Gzip, _, Some(gzip)) => {
444                    (gzip.clone(), Some(ContentEncoding::Gzip))
445                }
446                _ => (cached.content.clone(), None),
447            };
448
449        // For HEAD, return empty body
450        let body = if req.method() == Method::HEAD {
451            Bytes::new()
452        } else {
453            content
454        };
455
456        let mut response = Response::builder()
457            .status(StatusCode::OK)
458            .header(header::CONTENT_TYPE, &cached.content_type)
459            .header(header::CONTENT_LENGTH, body.len())
460            .header(header::ACCEPT_RANGES, "bytes")
461            .header(header::ETAG, &cached.etag)
462            .header(header::CACHE_CONTROL, &self.config.cache_control)
463            .header(
464                header::LAST_MODIFIED,
465                httpdate::fmt_http_date(cached.last_modified),
466            );
467
468        if let Some(enc) = content_encoding {
469            response = response.header(header::CONTENT_ENCODING, enc.as_str());
470            response = response.header(header::VARY, "Accept-Encoding");
471        }
472
473        Ok(response.body(Full::new(body))?)
474    }
475
476    /// Build HEAD response
477    fn build_head_response(
478        &self,
479        content_type: &str,
480        file_size: u64,
481        etag: &str,
482        modified: std::time::SystemTime,
483    ) -> Result<Response<Full<Bytes>>> {
484        Ok(Response::builder()
485            .status(StatusCode::OK)
486            .header(header::CONTENT_TYPE, content_type)
487            .header(header::CONTENT_LENGTH, file_size)
488            .header(header::ACCEPT_RANGES, "bytes")
489            .header(header::ETAG, etag)
490            .header(header::LAST_MODIFIED, httpdate::fmt_http_date(modified))
491            .header(header::CACHE_CONTROL, &self.config.cache_control)
492            .body(Full::new(Bytes::new()))?)
493    }
494
495    /// Generate directory listing
496    async fn generate_directory_listing(&self, dir_path: &Path) -> Result<Response<Full<Bytes>>> {
497        let mut entries = fs::read_dir(dir_path).await?;
498        let mut items = Vec::new();
499
500        while let Some(entry) = entries.next_entry().await? {
501            let metadata = entry.metadata().await?;
502            let name = entry.file_name().to_string_lossy().to_string();
503            let is_dir = metadata.is_dir();
504            let size = if is_dir { 0 } else { metadata.len() };
505            let modified = metadata.modified()?;
506
507            items.push((name, is_dir, size, modified));
508        }
509
510        // Sort: directories first, then alphabetically
511        items.sort_by(|a, b| match (a.1, b.1) {
512            (true, false) => std::cmp::Ordering::Less,
513            (false, true) => std::cmp::Ordering::Greater,
514            _ => a.0.cmp(&b.0),
515        });
516
517        let path_display = dir_path
518            .strip_prefix(&self.config.root)
519            .unwrap_or(dir_path)
520            .display();
521
522        let mut html = format!(
523            r#"<!DOCTYPE html>
524<html lang="en">
525<head>
526    <meta charset="UTF-8">
527    <meta name="viewport" content="width=device-width, initial-scale=1.0">
528    <title>Index of /{}</title>
529    <style>
530        body {{ font-family: monospace; margin: 20px; }}
531        h1 {{ font-size: 24px; }}
532        table {{ border-collapse: collapse; }}
533        th, td {{ padding: 8px 15px; text-align: left; }}
534        th {{ background: #f0f0f0; }}
535        tr:hover {{ background: #f8f8f8; }}
536        a {{ text-decoration: none; color: #0066cc; }}
537        a:hover {{ text-decoration: underline; }}
538        .dir {{ font-weight: bold; }}
539        .size {{ text-align: right; }}
540    </style>
541</head>
542<body>
543    <h1>Index of /{}</h1>
544    <table>
545        <tr><th>Name</th><th>Size</th><th>Modified</th></tr>"#,
546            path_display, path_display
547        );
548
549        for (name, is_dir, size, modified) in items {
550            let display_name = if is_dir {
551                format!("{}/", name)
552            } else {
553                name.clone()
554            };
555            let size_str = if is_dir {
556                "-".to_string()
557            } else {
558                format_size(size)
559            };
560            let class = if is_dir { "dir" } else { "" };
561
562            html.push_str(&format!(
563                r#"<tr><td><a href="{}" class="{}">{}</a></td><td class="size">{}</td><td>{}</td></tr>"#,
564                urlencoding::encode(&name),
565                class,
566                html_escape::encode_text(&display_name),
567                size_str,
568                httpdate::fmt_http_date(modified)
569            ));
570        }
571
572        html.push_str("</table></body></html>");
573
574        Ok(Response::builder()
575            .status(StatusCode::OK)
576            .header(header::CONTENT_TYPE, "text/html; charset=utf-8")
577            .body(Full::new(Bytes::from(html)))?)
578    }
579
580    /// Generate 404 Not Found response
581    fn not_found_response(&self) -> Result<Response<Full<Bytes>>> {
582        Ok(Response::builder()
583            .status(StatusCode::NOT_FOUND)
584            .header(header::CONTENT_TYPE, "text/plain")
585            .body(Full::new(Bytes::from_static(b"404 Not Found")))?)
586    }
587}
588
589// ============================================================================
590// Helpers
591// ============================================================================
592
593/// Format file size for display
594fn format_size(size: u64) -> String {
595    const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
596    let mut size = size as f64;
597    let mut unit_index = 0;
598
599    while size >= 1024.0 && unit_index < UNITS.len() - 1 {
600        size /= 1024.0;
601        unit_index += 1;
602    }
603
604    if unit_index == 0 {
605        format!("{} {}", size as u64, UNITS[unit_index])
606    } else {
607        format!("{:.1} {}", size, UNITS[unit_index])
608    }
609}
610
611#[cfg(test)]
612mod tests {
613    use super::*;
614    use tempfile::TempDir;
615
616    #[tokio::test]
617    async fn test_static_file_server() {
618        let temp_dir = TempDir::new().unwrap();
619        let root = temp_dir.path().to_path_buf();
620
621        // Create test files
622        std::fs::write(root.join("test.txt"), "Hello, World!").unwrap();
623        std::fs::write(root.join("style.css"), "body { color: red; }").unwrap();
624
625        let config = StaticFileConfig {
626            root: root.clone(),
627            index: "index.html".to_string(),
628            directory_listing: true,
629            cache_control: "public, max-age=3600".to_string(),
630            compress: true,
631            mime_types: std::collections::HashMap::new(),
632            fallback: None,
633        };
634
635        let server = StaticFileServer::new(config);
636
637        // Test serving a file
638        let req = Request::builder()
639            .method(Method::GET)
640            .uri("/test.txt")
641            .body(())
642            .unwrap();
643
644        let response = server.serve(&req, "/test.txt").await.unwrap();
645        assert_eq!(response.status(), StatusCode::OK);
646    }
647
648    #[test]
649    fn test_format_size() {
650        assert_eq!(format_size(0), "0 B");
651        assert_eq!(format_size(512), "512 B");
652        assert_eq!(format_size(1024), "1.0 KB");
653        assert_eq!(format_size(1536), "1.5 KB");
654        assert_eq!(format_size(1024 * 1024), "1.0 MB");
655        assert_eq!(format_size(1024 * 1024 * 1024), "1.0 GB");
656    }
657}