hexz_server/lib.rs
1#![cfg_attr(test, allow(clippy::unwrap_used, clippy::expect_used, unused_results))]
2
3//! HTTP, NBD, and S3 gateway server implementations for exposing Hexz archives.
4//!
5//! This module provides network-facing interfaces for accessing compressed Hexz
6//! archive data over standard protocols. It supports three distinct serving modes:
7//!
8//! 1. **HTTP Range Server** (`serve_http`): Exposes disk and auxiliary streams via
9//! HTTP 1.1 range requests with `DoS` protection and partial content support.
10//! 2. **NBD (Network Block Device) Server** (`serve_nbd`): Allows mounting archives
11//! as Linux block devices using the standard NBD protocol.
12//! 3. **S3 Gateway** (`serve_s3_gateway`): Planned S3-compatible API for cloud
13//! integration (currently unimplemented).
14//!
15//! # Architecture Overview
16//!
17//! All servers expose the same underlying `Archive` API, which provides:
18//! - Block-level decompression with LRU caching
19//! - Dual-stream access (disk and memory archives)
20//! - Random access with minimal I/O overhead
21//! - Thread-safe concurrent reads via `Arc<Archive>`
22//!
23//! The servers differ in protocol semantics and use cases:
24//!
25//! | Protocol | Use Case | Access Pattern | Authentication |
26//! |----------|----------|----------------|----------------|
27//! | HTTP | Browser/API access | Range requests | None (planned) |
28//! | NBD | Linux block device mount | Block-level reads | None |
29//! | S3 | Cloud integration | Object API | AWS `SigV4` (planned) |
30//!
31//! # Design Decisions
32//!
33//! ## Why HTTP Range Requests?
34//!
35//! HTTP range requests (RFC 7233) provide a standardized way to access large files
36//! in chunks without loading the entire file into memory. This aligns perfectly with
37//! Hexz's block-indexed architecture, allowing clients to fetch only the data they
38//! need. The implementation:
39//!
40//! - Returns HTTP 206 (Partial Content) for range requests
41//! - Returns HTTP 416 (Range Not Satisfiable) for invalid ranges
42//! - Clamps requests to `MAX_CHUNK_SIZE` (32 MiB) to prevent memory exhaustion
43//! - Supports both bounded (`bytes=0-1023`) and unbounded (`bytes=1024-`) ranges
44//!
45//! ## Why NBD Protocol?
46//!
47//! The Network Block Device protocol allows mounting remote storage as a local block
48//! device on Linux systems. This enables:
49//! - Transparent filesystem access (mount archive, browse files)
50//! - Use of standard Linux tools (`dd`, `fsck`, `mount`)
51//! - Zero application changes (existing software works unmodified)
52//!
53//! Trade-offs:
54//! - **Pro**: Native OS integration, no special client software required
55//! - **Pro**: Kernel handles caching and buffering
56//! - **Con**: No built-in encryption or authentication
57//! - **Con**: TCP-based, higher latency than local disk
58//!
59//! ## Security Architecture
60//!
61//! ### Current Security Posture (localhost-only)
62//!
63//! All servers bind to `127.0.0.1` (loopback) by default, preventing network exposure.
64//! This is appropriate for:
65//! - Local development and testing
66//! - Forensics workstations accessing local archives
67//! - Scenarios where network access is provided via SSH tunnels or VPNs
68//!
69//! ### Attack Surface
70//!
71//! The current implementation has a minimal attack surface:
72//! 1. **`DoS` via large reads**: Mitigated by `MAX_CHUNK_SIZE` clamping (32 MiB)
73//! 2. **Range header parsing**: Simplified parser with strict validation
74//! 3. **Connection exhaustion**: Limited by OS socket limits, no artificial cap
75//! 4. **Path traversal**: N/A (no filesystem access, only fixed `/disk` and `/memory` routes)
76//!
77//! ### Future Security Enhancements (Planned)
78//!
79//! - TLS/HTTPS support for encrypted transport
80//! - Token-based authentication (Bearer tokens)
81//! - Rate limiting per IP address
82//! - Configurable bind addresses (`0.0.0.0` for network access)
83//! - Request logging and audit trails
84//!
85//! # Performance Characteristics
86//!
87//! ## HTTP Server
88//!
89//! - **Throughput**: ~500-2000 MB/s (limited by decompression, not network)
90//! - **Latency**: ~1-5 ms per request (includes decompression)
91//! - **Concurrency**: Handles 1000+ concurrent connections (Tokio async runtime)
92//! - **Memory**: ~100 KB per connection + block cache overhead
93//!
94//! ## NBD Server
95//!
96//! - **Throughput**: ~500-1000 MB/s (similar to HTTP, plus NBD protocol overhead)
97//! - **Latency**: ~2-10 ms per block read (includes TCP RTT + decompression)
98//! - **Concurrency**: One Tokio task per client connection
99//!
100//! ## Bottlenecks
101//!
102//! For local (localhost) connections, the main bottleneck is:
103//! 1. **Decompression CPU time** (80% of latency for LZ4, more for ZSTD)
104//! 2. **Block cache misses** (requires backend I/O)
105//! 3. **Memory allocation** for large reads (mitigated by clamping)
106//!
107//! Network bandwidth is rarely a bottleneck for localhost connections.
108//!
109//! # Examples
110//!
111//! ## Starting an HTTP Server
112//!
113//! ```no_run
114//! use std::sync::Arc;
115//! use hexz_core::Archive;
116//! use hexz_store::local::FileBackend;
117//! use hexz_core::algo::compression::lz4::Lz4Compressor;
118//! use hexz_server::serve_http;
119//!
120//! # #[tokio::main]
121//! # async fn main() -> anyhow::Result<()> {
122//! let backend = Arc::new(FileBackend::new("archive.hxz".as_ref())?);
123//! let compressor = Box::new(Lz4Compressor::new());
124//! let snap = Archive::new(backend, compressor, None)?;
125//!
126//! // Start HTTP server on port 8080
127//! serve_http(snap, 8080, "127.0.0.1").await?;
128//! # Ok(())
129//! # }
130//! ```
131//!
132//! ## Starting an NBD Server
133//!
134//! ```no_run
135//! use std::sync::Arc;
136//! use hexz_core::Archive;
137//! use hexz_store::local::FileBackend;
138//! use hexz_core::algo::compression::lz4::Lz4Compressor;
139//! use hexz_server::serve_nbd;
140//!
141//! # #[tokio::main]
142//! # async fn main() -> anyhow::Result<()> {
143//! let backend = Arc::new(FileBackend::new("archive.hxz".as_ref())?);
144//! let compressor = Box::new(Lz4Compressor::new());
145//! let snap = Archive::new(backend, compressor, None)?;
146//!
147//! // Start NBD server on port 10809
148//! serve_nbd(snap, 10809, "127.0.0.1").await?;
149//! # Ok(())
150//! # }
151//! ```
152//!
153//! ## Client Usage Examples
154//!
155//! ### HTTP Client (curl)
156//!
157//! ```bash
158//! # Fetch the first 4KB of the main stream
159//! curl -H "Range: bytes=0-4095" http://localhost:8080/disk -o chunk.bin
160//!
161//! # Fetch 1MB starting at offset 1MB
162//! curl -H "Range: bytes=1048576-2097151" http://localhost:8080/memory -o mem_chunk.bin
163//!
164//! # Fetch from offset to EOF (server will clamp to MAX_CHUNK_SIZE)
165//! curl -H "Range: bytes=1048576-" http://localhost:8080/disk
166//! ```
167//!
168//! ### NBD Client (Linux)
169//!
170//! ```bash
171//! # Connect NBD client to server
172//! sudo nbd-client localhost 10809 /dev/nbd0
173//!
174//! # Mount the block device (read-only)
175//! sudo mount -o ro /dev/nbd0 /mnt/archive
176//!
177//! # Access files normally
178//! ls -la /mnt/archive
179//! cat /mnt/archive/important.log
180//!
181//! # Disconnect when done
182//! sudo umount /mnt/archive
183//! sudo nbd-client -d /dev/nbd0
184//! ```
185//!
186//! # Protocol References
187//!
188//! - **HTTP Range Requests**: [RFC 7233](https://tools.ietf.org/html/rfc7233)
189//! - **NBD Protocol**: [NBD Protocol Specification](https://github.com/NetworkBlockDevice/nbd/blob/master/doc/proto.md)
190//! - **S3 API**: [AWS S3 API Reference](https://docs.aws.amazon.com/s3/index.html) (future work)
191
192pub mod nbd;
193
194use axum::{
195 Router,
196 extract::State,
197 http::{HeaderMap, StatusCode, header},
198 response::{IntoResponse, Response},
199 routing::get,
200};
201use hexz_core::{Archive, ArchiveStream};
202use std::net::SocketAddr;
203use std::sync::Arc;
204use tokio::net::TcpListener;
205
206/// IPv4 address for all server listeners (localhost only).
207///
208/// # Security Rationale
209///
210/// This constant defaults to the loopback address (`127.0.0.1`) to prevent
211/// accidental exposure of archive data to the local network or internet.
212/// Archives may contain sensitive information (credentials, personal data,
213/// proprietary code), so network exposure must be an explicit, informed decision.
214///
215/// ## Current Behavior
216///
217/// All servers (HTTP, NBD, S3) bind to `127.0.0.1`, making them accessible only
218/// from the local machine. Remote access requires:
219/// - SSH port forwarding: `ssh -L 8080:localhost:8080 user@server`
220/// - VPN tunnel with local forwarding
221/// - Reverse proxy with authentication (e.g., nginx with TLS + basic auth)
222///
223/// ## Future Enhancement
224///
225/// To enable network access, a future version will support configurable bind
226/// addresses via command-line flags or configuration files:
227///
228/// ```bash
229/// # Proposed CLI syntax (not yet implemented)
230/// hexz-server --bind 0.0.0.0:8080 --auth-token mytoken123 archive.st
231/// ```
232///
233/// Network exposure will require authentication to be enabled (enforced by the CLI).
234/// Length in bytes of the HTTP `Range` header prefix `"bytes="`.
235///
236/// The HTTP Range header format is defined in RFC 7233 as:
237///
238/// ```text
239/// Range: bytes=<start>-<end>
240/// ```
241///
242/// This constant represents the length of the literal string `"bytes="` (6 bytes),
243/// which is stripped during parsing. The parser supports:
244///
245/// - Bounded ranges: `bytes=0-1023` (fetch bytes 0 through 1023 inclusive)
246/// - Unbounded ranges: `bytes=1024-` (fetch from byte 1024 to EOF)
247/// - Single-byte ranges: `bytes=0-0` (fetch only byte 0)
248///
249/// Unsupported range types (will return HTTP 416):
250/// - Suffix ranges: `bytes=-500` (last 500 bytes)
251/// - Multi-part ranges: `bytes=0-100,200-300`
252///
253/// # Rationale for Limited Support
254///
255/// Suffix ranges and multi-part ranges are rarely used in practice and add
256/// significant parsing complexity. If needed for browser compatibility, they
257/// can be added in a future version without breaking existing clients.
258const RANGE_PREFIX_LEN: usize = 6;
259
260/// Maximum allowed read size per HTTP request to prevent `DoS` attacks.
261///
262/// # Value
263///
264/// 32 MiB (33,554,432 bytes)
265///
266/// # `DoS` Protection Rationale
267///
268/// Without a limit, a malicious client could request the entire archive in a single
269/// HTTP request (e.g., `Range: bytes=0-`), forcing the server to:
270///
271/// 1. Decompress gigabytes of data
272/// 2. Allocate gigabytes of heap memory
273/// 3. Hold that memory while slowly transmitting over the network
274///
275/// With multiple concurrent requests, this could exhaust server memory and CPU,
276/// causing crashes or unresponsiveness (denial of service).
277///
278/// # Why 32 MiB?
279///
280/// This value balances throughput efficiency and resource protection:
281///
282/// - **Large enough**: Clients can fetch substantial chunks with low overhead
283/// (at 1 Gbps, 32 MiB transfers in ~256 ms)
284/// - **Small enough**: Even 100 concurrent maximal requests consume <3.2 GB RAM,
285/// which is manageable on modern servers
286/// - **Common practice**: Many HTTP servers use similar limits (nginx default: 16 MiB,
287/// AWS S3 max single GET: 5 GB but recommends <100 MB for performance)
288///
289/// # Clamping Behavior
290///
291/// When a client requests more than `MAX_CHUNK_SIZE` bytes:
292///
293/// 1. The server clamps the end offset: `end = min(end, start + MAX_CHUNK_SIZE - 1)`
294/// 2. Returns HTTP 206 with the clamped range in the `Content-Range` header
295/// 3. The client sees a short read and can issue follow-up requests
296///
297/// Example:
298///
299/// ```text
300/// Client request: Range: bytes=0-67108863 (64 MiB)
301/// Server response: Content-Range: bytes 0-33554431/total (32 MiB)
302/// ```
303///
304/// The client must check the `Content-Range` header to detect clamping.
305///
306/// # Future Work
307///
308/// This limit could be made configurable via CLI flags for scenarios where higher
309/// memory usage is acceptable (e.g., dedicated forensics servers with 128+ GB RAM).
310const MAX_CHUNK_SIZE: u64 = 32 * 1024 * 1024;
311
312/// Shared application state for the HTTP serving layer.
313///
314/// This struct is wrapped in `Arc` and cloned for each HTTP request handler.
315/// The inner `snap` field is also `Arc`-wrapped, so cloning `AppState` is cheap
316/// (just incrementing reference counts, no data copying).
317///
318/// # Thread Safety
319///
320/// `AppState` is `Send + Sync` because `Archive` is `Send + Sync`. The underlying
321/// block cache uses `Mutex` for interior mutability, so multiple concurrent requests
322/// can safely read from the same archive.
323///
324/// # Memory Overhead
325///
326/// Each `AppState` clone adds ~16 bytes (one `Arc` pointer). With 1000 concurrent
327/// connections, this overhead is negligible (~16 KB).
328struct AppState {
329 /// The opened Hexz archive file being served via HTTP.
330 ///
331 /// This is the same `Archive` instance for all requests. It contains:
332 /// - The storage backend (local file, S3, etc.)
333 /// - Block cache (shared across all requests)
334 /// - Decompressor instances (thread-local via pooling)
335 snap: Arc<Archive>,
336}
337
338/// Exposes a `Archive` over NBD (Network Block Device) protocol.
339///
340/// Starts a TCP listener on `127.0.0.1:<port>` that implements the NBD protocol,
341/// allowing Linux clients to mount the Hexz archive as a local block device
342/// using standard tools like `nbd-client`.
343///
344/// This function runs indefinitely, accepting connections in a loop. Each client
345/// connection is handled in a separate Tokio task, allowing concurrent clients.
346///
347/// # Arguments
348///
349/// - `snap`: The Hexz archive file to expose. Must be wrapped in `Arc` for sharing
350/// across multiple client connections.
351/// - `port`: TCP port to bind to on the loopback interface (e.g., `10809`).
352///
353/// # Returns
354///
355/// This function never returns under normal operation (it runs forever). It only
356/// returns `Err` if:
357/// - The TCP listener fails to bind (port already in use, permission denied)
358/// - An unrecoverable I/O error occurs on the listener socket
359///
360/// Individual client errors (malformed requests, disconnects) are logged but do not
361/// stop the server.
362///
363/// # Errors
364///
365/// - `std::io::Error`: If binding to the socket fails or the listener encounters
366/// a fatal error.
367///
368/// # Examples
369///
370/// ```no_run
371/// use std::sync::Arc;
372/// use hexz_core::Archive;
373/// use hexz_store::local::FileBackend;
374/// use hexz_core::algo::compression::lz4::Lz4Compressor;
375/// use hexz_server::serve_nbd;
376///
377/// # #[tokio::main]
378/// # async fn main() -> anyhow::Result<()> {
379/// let backend = Arc::new(FileBackend::new("vm_archive.hxz".as_ref())?);
380/// let compressor = Box::new(Lz4Compressor::new());
381/// let snap = Archive::new(backend, compressor, None)?;
382///
383/// // Start NBD server (runs forever)
384/// serve_nbd(snap, 10809, "127.0.0.1").await?;
385/// # Ok(())
386/// # }
387/// ```
388///
389/// ## Client-Side Usage (Linux)
390///
391/// ```bash
392/// # Connect to the NBD server
393/// sudo nbd-client localhost 10809 /dev/nbd0
394///
395/// # Mount the block device (read-only, automatically detected filesystem)
396/// sudo mount -o ro /dev/nbd0 /mnt/archive
397///
398/// # Browse files normally
399/// ls -la /mnt/archive
400/// sudo cat /mnt/archive/var/log/syslog
401///
402/// # Unmount and disconnect
403/// sudo umount /mnt/archive
404/// sudo nbd-client -d /dev/nbd0
405/// ```
406///
407/// # Security Considerations
408///
409/// ## No Encryption
410///
411/// The NBD protocol transmits data in plaintext. For localhost connections this
412/// is acceptable, but for remote access consider:
413///
414/// - **SSH tunnel**: `ssh -L 10809:localhost:10809 user@server`
415/// - **VPN**: `WireGuard`, `OpenVPN`, etc.
416/// - **TLS wrapper**: `stunnel` or similar
417///
418/// ## No Authentication
419///
420/// Any process with network access to the port can connect. The default loopback
421/// binding mitigates this, but if exposing to the network, use firewall rules or
422/// SSH key authentication.
423///
424/// ## Read-Only Enforcement
425///
426/// The NBD server always exports archives as read-only (NBD flag `NBD_FLAG_READ_ONLY`).
427/// Write attempts return `EPERM` (operation not permitted). However, a malicious
428/// NBD client could theoretically attempt to crash the server via protocol abuse.
429///
430/// # Performance Notes
431///
432/// - **Concurrency**: Each client spawns a separate Tokio task. With 100 concurrent
433/// clients, memory overhead is ~10 MB (100 KB per task).
434/// - **Throughput**: Typically 500-1000 MB/s for sequential reads, limited by
435/// decompression rather than NBD protocol overhead.
436/// - **Latency**: ~2-10 ms per read, including TCP round-trip and decompression.
437///
438/// # Panics
439///
440/// This function does not panic under normal operation. Client errors are logged
441/// and handled gracefully.
442pub async fn serve_nbd(snap: Arc<Archive>, port: u16, bind: &str) -> anyhow::Result<()> {
443 let addr: SocketAddr = format!("{bind}:{port}").parse()?;
444 let listener = TcpListener::bind(addr).await?;
445
446 tracing::info!("NBD server listening on {}", addr);
447 println!(
448 "NBD server started on {addr}. Use 'nbd-client localhost {port} /dev/nbd0' to mount."
449 );
450
451 loop {
452 // Accept incoming NBD connections
453 let (socket, remote_addr) = match listener.accept().await {
454 Ok(conn) => conn,
455 Err(e) => {
456 tracing::warn!("NBD accept error (continuing): {}", e);
457 continue;
458 }
459 };
460 tracing::debug!("Accepted NBD connection from {}", remote_addr);
461
462 let snap_clone = snap.clone();
463 _ = tokio::spawn(async move {
464 if let Err(e) = nbd::handle_client(socket, snap_clone).await {
465 tracing::error!("NBD client error: {}", e);
466 }
467 });
468 }
469}
470
471/// Exposes a `Archive` as an S3-compatible object storage gateway.
472///
473/// # Implementation Status: NOT IMPLEMENTED
474///
475/// This function is a **placeholder** for future S3 API compatibility. It currently
476/// blocks forever without serving any requests. Calling this function will NOT panic,
477/// but it provides no useful functionality.
478///
479/// # Planned Functionality
480///
481/// When implemented, this gateway will provide S3-compatible HTTP endpoints for:
482///
483/// ## Supported Operations (Planned)
484///
485/// - `GET /<bucket>/<key>`: Retrieve archive data as an S3 object
486/// - `HEAD /<bucket>/<key>`: Get object metadata (size, `ETag`)
487/// - `GET /<bucket>/<key>?range=bytes=<start>-<end>`: Partial object retrieval
488/// - `GET /<bucket>?list-type=2`: List objects (future: multi-archive support)
489///
490/// ## S3 API Compatibility Goals
491///
492/// - **Authentication**: AWS Signature Version 4 (`SigV4`) for production use
493/// - **Authorization**: IAM-style policies (read-only by default)
494/// - **Error responses**: Standard S3 XML error responses
495/// - **Metadata**: `ETag` (CRC32 of archive header), Content-Type, Last-Modified
496///
497/// ## Mapping Hexz Concepts to S3
498///
499/// | Hexz Concept | S3 Equivalent | Mapping Strategy |
500/// |----------------|---------------|------------------|
501/// | Archive file | Bucket | One bucket per archive |
502/// | Main stream | Object `disk.img` | Virtual object, synthesized from archive |
503/// | Auxiliary stream | Object `memory.img` | Virtual object, synthesized from archive |
504/// | Block index | N/A | Transparent to S3 clients |
505///
506/// ## Example S3 API Usage (Planned)
507///
508/// ```bash
509/// # Configure AWS CLI to point to local S3 gateway
510/// export AWS_ACCESS_KEY_ID=minioadmin
511/// export AWS_SECRET_ACCESS_KEY=minioadmin
512/// export AWS_ENDPOINT_URL=http://localhost:9000
513///
514/// # List buckets (archives)
515/// aws s3 ls
516///
517/// # List objects in a archive
518/// aws s3 ls s3://my-archive/
519///
520/// # Download the main stream
521/// aws s3 cp s3://my-archive/disk.img disk_copy.img
522///
523/// # Download a range (100 MB starting at offset 1 GB)
524/// aws s3api get-object --bucket my-archive --key disk.img \
525/// --range bytes=1073741824-1178599423 chunk.bin
526/// ```
527///
528/// # Configuration (Planned)
529///
530/// Future configuration options (not yet implemented):
531///
532/// - **Bind address**: CLI flag `--s3-bind 0.0.0.0:9000` (default: `127.0.0.1`)
533/// - **Authentication**: `--s3-access-key` and `--s3-secret-key` for `SigV4`
534/// - **Bucket name**: `--s3-bucket-name <name>` (default: derived from archive filename)
535/// - **Anonymous access**: `--s3-allow-anonymous` flag (dangerous, for testing only)
536///
537/// # Why S3 Compatibility?
538///
539/// S3 is a de facto standard for object storage. Supporting the S3 API enables:
540///
541/// 1. **Cloud integration**: Use Hexz with existing cloud infrastructure (AWS, `MinIO`, etc.)
542/// 2. **Tool compatibility**: Any S3-compatible tool (s3cmd, rclone, boto3) works with Hexz
543/// 3. **Caching CDNs**: Front the gateway with `CloudFront` or similar for caching
544/// 4. **Lifecycle policies**: Future support for automated archive expiration
545///
546/// # Security Considerations (Planned)
547///
548/// When implemented, the S3 gateway will require authentication by default:
549///
550/// - **`SigV4` authentication**: All requests must include valid AWS Signature V4 headers
551/// - **Read-only mode**: No PUT/DELETE operations to prevent accidental modification
552/// - **Rate limiting**: Per-access-key request throttling to prevent abuse
553/// - **TLS requirement**: Production deployments must use HTTPS (enforced by CLI flag check)
554///
555/// # Performance Goals (Planned)
556///
557/// - **Throughput**: Match HTTP server performance (~500-2000 MB/s)
558/// - **Latency**: <10 ms for authenticated requests (signature verification adds ~1-2 ms)
559/// - **Concurrency**: Handle 1000+ concurrent S3 GET requests
560///
561/// # Limitations (Planned)
562///
563/// The S3 gateway will NOT support:
564///
565/// - **Write operations**: No PUT, POST, DELETE (archives are read-only)
566/// - **Multipart uploads**: N/A for read-only gateway
567/// - **Bucket policies**: Simplified IAM-like policies only
568/// - **Versioning**: Archives are immutable, no object versioning needed
569/// - **Server-side encryption**: Use TLS for transport encryption instead
570///
571/// # Arguments
572///
573/// - `_snap`: The Hexz archive to expose (currently unused).
574/// - `port`: TCP port to bind to on the loopback interface (e.g., `9000`).
575///
576/// # Returns
577///
578/// This function never returns (blocks indefinitely on `std::future::pending()`).
579/// It does not perform any useful work in the current implementation.
580///
581/// # Errors
582///
583/// Currently, this function cannot return an error (it blocks forever). In the
584/// future implementation, it will return errors for:
585///
586/// - Socket binding failures
587/// - Configuration validation errors
588/// - Unrecoverable I/O errors on the listener
589///
590/// # Examples
591///
592/// ```no_run
593/// use std::sync::Arc;
594/// use hexz_core::Archive;
595/// use hexz_store::local::FileBackend;
596/// use hexz_core::algo::compression::lz4::Lz4Compressor;
597/// use hexz_server::serve_s3_gateway;
598///
599/// # #[tokio::main]
600/// # async fn main() -> anyhow::Result<()> {
601/// let backend = Arc::new(FileBackend::new("archive.hxz".as_ref())?);
602/// let compressor = Box::new(Lz4Compressor::new());
603/// let snap = Archive::new(backend, compressor, None)?;
604///
605/// // WARNING: This will block forever without serving requests
606/// serve_s3_gateway(snap, 9000).await?;
607/// # Ok(())
608/// # }
609/// ```
610///
611/// # Implementation Roadmap
612///
613/// 1. **Phase 1**: Basic GET/HEAD operations with no authentication (localhost-only)
614/// 2. **Phase 2**: AWS `SigV4` authentication and bucket listing
615/// 3. **Phase 3**: Multi-archive support (multiple buckets)
616/// 4. **Phase 4**: TLS support and network binding options
617/// 5. **Phase 5**: IAM-style policies and access control
618///
619/// # Call for Contributions
620///
621/// Implementing S3 compatibility is a substantial undertaking. If you are interested
622/// in contributing, see `docs/s3_gateway_design.md` (to be created) for the design
623/// specification and implementation plan.
624#[deprecated(note = "Not implemented. Blocks indefinitely without serving requests.")]
625pub async fn serve_s3_gateway(_snap: Arc<Archive>, port: u16) -> anyhow::Result<()> {
626 tracing::info!("Starting S3 Gateway on port {}", port);
627 println!(
628 "S3 Gateway started on port {port} (Not fully implemented)"
629 );
630 std::future::pending::<()>().await; // Keep alive
631 unreachable!();
632}
633
634/// Exposes a `Archive` over HTTP with range request support.
635///
636/// Starts an HTTP 1.1 server on `127.0.0.1:<port>` that exposes archive data via
637/// two endpoints:
638///
639/// - `GET /disk`: Serves the main stream (persistent storage archive)
640/// - `GET /memory`: Serves the auxiliary stream (RAM archive)
641///
642/// Both endpoints support HTTP range requests (RFC 7233) for partial content retrieval.
643///
644/// # Protocol Behavior
645///
646/// ## Full Content Request (No Range Header)
647///
648/// ```http
649/// GET /disk HTTP/1.1
650/// Host: localhost:8080
651/// ```
652///
653/// Response:
654///
655/// ```http
656/// HTTP/1.1 206 Partial Content
657/// Content-Type: application/octet-stream
658/// Content-Range: bytes 0-33554431/10737418240
659/// Accept-Ranges: bytes
660///
661/// [First 32 MiB of data, clamped by MAX_CHUNK_SIZE]
662/// ```
663///
664/// Note: Even without a `Range` header, the response is clamped to `MAX_CHUNK_SIZE`
665/// and returns HTTP 206 (not 200) to indicate partial content.
666///
667/// ## Range Request (Partial Content)
668///
669/// ```http
670/// GET /memory HTTP/1.1
671/// Host: localhost:8080
672/// Range: bytes=1048576-2097151
673/// ```
674///
675/// Response (success):
676///
677/// ```http
678/// HTTP/1.1 206 Partial Content
679/// Content-Type: application/octet-stream
680/// Content-Range: bytes 1048576-2097151/8589934592
681/// Accept-Ranges: bytes
682///
683/// [1 MiB of data from offset 1048576]
684/// ```
685///
686/// Response (invalid range):
687///
688/// ```http
689/// HTTP/1.1 416 Range Not Satisfiable
690/// Content-Range: bytes */8589934592
691/// ```
692///
693/// ## Error Responses
694///
695/// - **416 Range Not Satisfiable**: Invalid range syntax or out-of-bounds request
696/// - **500 Internal Server Error**: Backend I/O failure or decompression error
697///
698/// # HTTP Range Request Limitations
699///
700/// ## Supported Range Types
701///
702/// - **Bounded ranges**: `bytes=<start>-<end>` (both offsets specified)
703/// - **Unbounded ranges**: `bytes=<start>-` (from start to EOF, clamped to `MAX_CHUNK_SIZE`)
704///
705/// ## Unsupported Range Types
706///
707/// These return HTTP 416 (Range Not Satisfiable):
708///
709/// - **Suffix ranges**: `bytes=-<suffix-length>` (e.g., `bytes=-1024` for last 1KB)
710/// - **Multi-part ranges**: `bytes=0-100,200-300` (multiple ranges in one request)
711///
712/// Rationale: These are rarely used and add significant implementation complexity.
713/// Standard range requests cover 99% of real-world use cases.
714///
715/// # `DoS` Protection Mechanisms
716///
717/// ## Request Size Clamping
718///
719/// All reads are clamped to `MAX_CHUNK_SIZE` (32 MiB) to prevent memory exhaustion:
720///
721/// ```text
722/// Client requests: bytes=0-1073741823 (1 GB)
723/// Server clamps to: bytes=0-33554431 (32 MiB)
724/// Response header: Content-Range: bytes 0-33554431/total
725/// ```
726///
727/// The client detects clamping by comparing the `Content-Range` header to the
728/// requested range and can issue follow-up requests for remaining data.
729///
730/// ## Connection Limits
731///
732/// The server relies on OS-level TCP connection limits (controlled by `ulimit -n`
733/// and kernel parameters). Tokio's async runtime handles thousands of concurrent
734/// connections efficiently (each connection consumes ~100 KB of memory).
735///
736/// For production deployments, consider:
737///
738/// - **Reverse proxy**: nginx or Caddy with connection limits and rate limiting
739/// - **Firewall rules**: Limit connections per IP address
740/// - **Resource limits**: Set `ulimit -n` to a reasonable value (e.g., 4096)
741///
742/// # Arguments
743///
744/// - `snap`: The Hexz archive file to expose. Must be wrapped in `Arc` for sharing
745/// across request handlers.
746/// - `port`: TCP port to bind to on the loopback interface (e.g., `8080`, `3000`).
747///
748/// # Returns
749///
750/// This function runs indefinitely, serving HTTP requests until the server is shut
751/// down (e.g., via Ctrl+C signal). It only returns `Err` if:
752///
753/// - The TCP listener fails to bind (port already in use, permission denied)
754/// - The HTTP server encounters a fatal error (should be extremely rare)
755///
756/// Individual request errors (invalid ranges, read failures) are handled gracefully
757/// and return appropriate HTTP error responses without stopping the server.
758///
759/// # Errors
760///
761/// - `std::io::Error`: If binding to the socket fails.
762/// - `anyhow::Error`: If the HTTP server encounters an unrecoverable error.
763///
764/// # Examples
765///
766/// ## Server Setup
767///
768/// ```no_run
769/// use std::sync::Arc;
770/// use hexz_core::Archive;
771/// use hexz_store::local::FileBackend;
772/// use hexz_core::algo::compression::lz4::Lz4Compressor;
773/// use hexz_server::serve_http;
774///
775/// # #[tokio::main]
776/// # async fn main() -> anyhow::Result<()> {
777/// let backend = Arc::new(FileBackend::new("archive.hxz".as_ref())?);
778/// let compressor = Box::new(Lz4Compressor::new());
779/// let snap = Archive::new(backend, compressor, None)?;
780///
781/// // Start HTTP server on port 8080 (runs forever)
782/// serve_http(snap, 8080, "127.0.0.1").await?;
783/// # Ok(())
784/// # }
785/// ```
786///
787/// ## Client Usage (curl)
788///
789/// ```bash
790/// # Fetch first 4KB of main stream
791/// curl -H "Range: bytes=0-4095" http://localhost:8080/disk -o chunk.bin
792///
793/// # Fetch 1MB starting at 1MB offset
794/// curl -H "Range: bytes=1048576-2097151" http://localhost:8080/memory -o mem_chunk.bin
795///
796/// # Fetch from offset to EOF (clamped to 32 MiB)
797/// curl -H "Range: bytes=1048576-" http://localhost:8080/disk -o large_chunk.bin
798///
799/// # Full GET (no range header, returns first 32 MiB)
800/// curl http://localhost:8080/disk -o first_32mb.bin
801/// ```
802///
803/// ## Client Usage (Python)
804///
805/// ```python
806/// import requests
807///
808/// # Fetch a range
809/// headers = {'Range': 'bytes=0-4095'}
810/// response = requests.get('http://localhost:8080/disk', headers=headers)
811/// assert response.status_code == 206 # Partial Content
812/// data = response.content
813/// print(f"Fetched {len(data)} bytes")
814///
815/// # Parse Content-Range header
816/// content_range = response.headers['Content-Range']
817/// # Example: "bytes 0-4095/10737418240"
818/// print(f"Content-Range: {content_range}")
819/// ```
820///
821/// # Performance Characteristics
822///
823/// ## Throughput
824///
825/// - **Local (127.0.0.1)**: 500-2000 MB/s (limited by decompression, not HTTP overhead)
826/// - **1 Gbps network**: ~120 MB/s (network-bound)
827/// - **10 Gbps network**: ~800 MB/s (may be decompression-bound for LZ4, network-bound for ZSTD)
828///
829/// ## Latency
830///
831/// - **Cache hit**: ~80μs (block already decompressed)
832/// - **Cache miss**: ~1-5 ms (includes decompression and backend I/O)
833/// - **Network RTT**: Add local RTT (~0.1 ms for localhost, ~10-50 ms for remote)
834///
835/// ## Memory Usage
836///
837/// - **Per connection**: ~100 KB (Tokio task stack + buffers)
838/// - **Per request**: ~32 MB worst-case (if requesting `MAX_CHUNK_SIZE`)
839/// - **Block cache**: Shared across all connections (typically 100-500 MB)
840///
841/// With 1000 concurrent connections, memory overhead is ~100 MB for connections
842/// plus the shared block cache.
843///
844/// # Security Considerations
845///
846/// ## Current Security Posture
847///
848/// - **Localhost-only**: Binds to `127.0.0.1`, not accessible from network
849/// - **No authentication**: Anyone with local access can read archive data
850/// - **No TLS**: Plaintext HTTP (acceptable for loopback)
851/// - **`DoS` protection**: Request size clamping, but no rate limiting
852///
853/// ## Threat Model
854///
855/// For localhost-only deployments, the threat model assumes:
856///
857/// 1. **Trusted local environment**: All local users are trusted (or isolated via OS permissions)
858/// 2. **No remote attackers**: Firewall prevents external access
859/// 3. **Process isolation**: Archive data is not more sensitive than other local files
860///
861/// ## Future Security Enhancements (Planned)
862///
863/// - **TLS/HTTPS**: Certificate-based encryption for network access
864/// - **Bearer token auth**: Simple token in `Authorization` header
865/// - **Rate limiting**: Per-IP request throttling
866/// - **Audit logging**: Request logs with client IP and byte ranges
867///
868/// # Panics
869///
870/// This function does not panic under normal operation. Request handling errors
871/// are converted to HTTP error responses.
872pub async fn serve_http(snap: Arc<Archive>, port: u16, bind: &str) -> anyhow::Result<()> {
873 let addr: SocketAddr = format!("{bind}:{port}").parse()?;
874 let listener = TcpListener::bind(addr).await?;
875 tracing::info!("HTTP server listening on {}", addr);
876 serve_http_with_listener(snap, listener).await
877}
878
879/// Like [`serve_http`], but accepts a pre-bound [`TcpListener`].
880///
881/// This avoids a TOCTOU race when the caller needs to discover a free port
882/// (bind to port 0) and then pass the listener directly instead of
883/// re-binding by port number.
884pub async fn serve_http_with_listener(
885 snap: Arc<Archive>,
886 listener: TcpListener,
887) -> anyhow::Result<()> {
888 let state = Arc::new(AppState { snap });
889
890 let app = Router::new()
891 .route("/disk", get(get_disk))
892 .route("/memory", get(get_memory))
893 .with_state(state);
894
895 axum::serve(listener, app).await?;
896 Ok(())
897}
898
899/// HTTP handler for the `/disk` endpoint.
900///
901/// Serves the main stream (persistent storage archive) from the Hexz file.
902/// Delegates to `handle_request` with `ArchiveStream::Main`.
903///
904/// # Route
905///
906/// `GET /disk`
907///
908/// # Request Headers
909///
910/// - `Range` (optional): HTTP range request (e.g., `bytes=0-4095`)
911///
912/// # Response Headers
913///
914/// - `Content-Type`: Always `application/octet-stream` (raw binary data)
915/// - `Content-Range`: Byte range served (e.g., `bytes 0-4095/10737418240`)
916/// - `Accept-Ranges`: Always `bytes` (indicates range request support)
917///
918/// # Response Status Codes
919///
920/// - **206 Partial Content**: Successful range request
921/// - **416 Range Not Satisfiable**: Invalid or out-of-bounds range
922/// - **500 Internal Server Error**: Archive read failure
923///
924/// # Examples
925///
926/// See `serve_http` for client usage examples.
927async fn get_disk(headers: HeaderMap, State(state): State<Arc<AppState>>) -> impl IntoResponse {
928 handle_request(&headers, &state.snap, ArchiveStream::Main)
929}
930
931/// HTTP handler for the `/memory` endpoint.
932///
933/// Serves the auxiliary stream (RAM archive) from the Hexz file.
934/// Delegates to `handle_request` with `ArchiveStream::Auxiliary`.
935///
936/// # Route
937///
938/// `GET /memory`
939///
940/// # Request Headers
941///
942/// - `Range` (optional): HTTP range request (e.g., `bytes=0-4095`)
943///
944/// # Response Headers
945///
946/// - `Content-Type`: Always `application/octet-stream` (raw binary data)
947/// - `Content-Range`: Byte range served (e.g., `bytes 0-4095/8589934592`)
948/// - `Accept-Ranges`: Always `bytes` (indicates range request support)
949///
950/// # Response Status Codes
951///
952/// - **206 Partial Content**: Successful range request
953/// - **416 Range Not Satisfiable**: Invalid or out-of-bounds range
954/// - **500 Internal Server Error**: Archive read failure
955///
956/// # Examples
957///
958/// See `serve_http` for client usage examples.
959async fn get_memory(headers: HeaderMap, State(state): State<Arc<AppState>>) -> impl IntoResponse {
960 handle_request(&headers, &state.snap, ArchiveStream::Auxiliary)
961}
962
963/// Core HTTP request handler that translates `Range` headers into archive reads.
964///
965/// This function implements the HTTP range request logic for both `/disk` and `/memory`
966/// endpoints. It performs the following steps:
967///
968/// 1. Parse the `Range` header (if present) or default to full stream access
969/// 2. Clamp the requested range to `MAX_CHUNK_SIZE` to prevent `DoS`
970/// 3. Read the data from the archive via `Archive::read_at`
971/// 4. Return HTTP 206 with `Content-Range` header, or error status codes
972///
973/// # Arguments
974///
975/// - `headers`: HTTP request headers from the client (parsed by Axum)
976/// - `snap`: The Hexz archive file to read from
977/// - `stream`: Which logical stream to read (`Disk` or `Memory`)
978///
979/// # Returns
980///
981/// An Axum `Response` with one of the following status codes:
982///
983/// - **206 Partial Content**: Successful read (even for full stream requests)
984/// - **416 Range Not Satisfiable**: Invalid range syntax or out-of-bounds offset
985/// - **500 Internal Server Error**: Archive read failure (decompression error, I/O error)
986///
987/// # HTTP Range Request Parsing
988///
989/// The `Range` header is expected in the format `bytes=<start>-<end>` where:
990///
991/// - `<start>` is the starting byte offset (inclusive, zero-indexed)
992/// - `<end>` is the ending byte offset (inclusive), or omitted for "to EOF"
993///
994/// ## Examples of Supported Ranges
995///
996/// ```text
997/// Range: bytes=0-1023 → Read bytes 0-1023 (1024 bytes)
998/// Range: bytes=1024-2047 → Read bytes 1024-2047 (1024 bytes)
999/// Range: bytes=1048576- → Read from 1MB to EOF (clamped to MAX_CHUNK_SIZE)
1000/// (no Range header) → Read from start to EOF (clamped to MAX_CHUNK_SIZE)
1001/// ```
1002///
1003/// ## Examples of Unsupported/Invalid Ranges
1004///
1005/// These return HTTP 416:
1006///
1007/// ```text
1008/// Range: bytes=-1024 → Suffix range (last 1024 bytes) - not supported
1009/// Range: bytes=0-100,200-300 → Multi-part range - not supported
1010/// Range: bytes=1000-500 → Start > end - invalid
1011/// Range: bytes=999999999999- → Start beyond EOF - out of bounds
1012/// ```
1013///
1014/// # `DoS` Protection: Range Clamping Algorithm
1015///
1016/// To prevent a malicious client from requesting gigabytes of data in a single
1017/// request, the handler clamps the effective range:
1018///
1019/// ```text
1020/// requested_length = end - start + 1
1021/// if requested_length > MAX_CHUNK_SIZE:
1022/// end = start + MAX_CHUNK_SIZE - 1
1023/// if end >= total_size:
1024/// end = total_size - 1
1025/// ```
1026///
1027/// The clamped range is reflected in the `Content-Range` response header:
1028///
1029/// ```text
1030/// Content-Range: bytes <actual_start>-<actual_end>/<total_size>
1031/// ```
1032///
1033/// Clients must check this header to detect clamping and issue follow-up requests
1034/// for remaining data.
1035///
1036/// ## Clamping Example
1037///
1038/// ```text
1039/// Client request: Range: bytes=0-67108863 (64 MiB)
1040/// Total size: 10 GB
1041/// Server clamps to: 0-33554431 (32 MiB due to MAX_CHUNK_SIZE)
1042/// Response header: Content-Range: bytes 0-33554431/10737418240
1043/// ```
1044///
1045/// # Error Handling
1046///
1047/// ## Range Parsing Errors
1048///
1049/// If `parse_range` returns `None`, the handler returns HTTP 416 (Range Not
1050/// Satisfiable). This occurs when:
1051///
1052/// - The `Range` header does not start with `"bytes="`
1053/// - The start/end offsets are not valid integers
1054/// - The start offset is greater than the end offset
1055/// - The end offset is beyond the stream size
1056///
1057/// ## Archive Read Errors
1058///
1059/// If `snap.read_at` returns `Err(_)`, the handler returns HTTP 500 (Internal
1060/// Server Error). This occurs when:
1061///
1062/// - Decompression fails (corrupted compressed data)
1063/// - Backend I/O fails (disk error, network timeout for remote backends)
1064/// - Encryption decryption fails (incorrect key, corrupted ciphertext)
1065///
1066/// The specific error is not exposed to the client (only logged internally) to
1067/// avoid information leakage.
1068///
1069/// # Edge Cases
1070///
1071/// ## Empty Range
1072///
1073/// If the calculated range length is 0 (e.g., due to clamping at EOF), the handler
1074/// returns HTTP 416. This should be rare in practice since clients typically request
1075/// valid ranges.
1076///
1077/// ## Zero-Sized Stream
1078///
1079/// If the archive stream size is 0 (empty disk or memory archive), any range
1080/// request returns HTTP 416 because no valid offsets exist.
1081///
1082/// ## Single-Byte Range
1083///
1084/// A request like `bytes=0-0` (fetch only byte 0) is valid and returns 1 byte with
1085/// HTTP 206 and `Content-Range: bytes 0-0/<total>`.
1086///
1087/// # Performance Characteristics
1088///
1089/// - **No Range Header**: Clamps to `MAX_CHUNK_SIZE`, then performs one `read_at` call
1090/// - **Valid Range**: One `read_at` call (may hit block cache or require decompression)
1091/// - **Invalid Range**: Immediate return (no archive I/O)
1092///
1093/// For cache hits, latency is ~80μs. For cache misses, latency is ~1-5 ms depending
1094/// on backend speed and compression algorithm.
1095///
1096/// # Security Notes
1097///
1098/// - **No authentication**: This function does not check credentials (handled by
1099/// future middleware or reverse proxy)
1100/// - **`DoS` mitigation**: Request size clamping prevents memory exhaustion
1101/// - **Information leakage**: Error responses do not reveal internal details
1102/// (e.g., "decompression failed" is hidden behind HTTP 500)
1103///
1104/// # Examples
1105///
1106/// See `serve_http`, `get_disk`, and `get_memory` for usage context.
1107fn handle_request(headers: &HeaderMap, snap: &Arc<Archive>, stream: ArchiveStream) -> Response {
1108 let total_size = snap.size(stream);
1109
1110 let (start, mut end) = if let Some(range) = headers.get(header::RANGE) {
1111 match parse_range(range.to_str().unwrap_or(""), total_size) {
1112 Some(r) => r,
1113 None => return StatusCode::RANGE_NOT_SATISFIABLE.into_response(),
1114 }
1115 } else {
1116 (0, total_size.saturating_sub(1))
1117 };
1118
1119 // SECURITY: DoS Protection
1120 // Clamp the requested range to avoid huge memory allocations.
1121 if end - start + 1 > MAX_CHUNK_SIZE {
1122 end = start + MAX_CHUNK_SIZE - 1;
1123 // Ensure we don't go past EOF after clamping
1124 if end >= total_size {
1125 end = total_size.saturating_sub(1);
1126 }
1127 }
1128
1129 let len = (end - start + 1) as usize;
1130 if len == 0 {
1131 // Handle empty range edge case
1132 return StatusCode::RANGE_NOT_SATISFIABLE.into_response();
1133 }
1134
1135 match snap.read_at(stream, start, len) {
1136 Ok(data) => (
1137 StatusCode::PARTIAL_CONTENT,
1138 [
1139 (header::CONTENT_TYPE, "application/octet-stream"),
1140 (
1141 header::CONTENT_RANGE,
1142 &format!("bytes {start}-{end}/{total_size}"),
1143 ),
1144 (header::ACCEPT_RANGES, "bytes"),
1145 ],
1146 data,
1147 )
1148 .into_response(),
1149 Err(_) => StatusCode::INTERNAL_SERVER_ERROR.into_response(),
1150 }
1151}
1152
1153/// Parses an HTTP `Range` header into absolute byte offsets.
1154///
1155/// Implements a subset of HTTP range request syntax (RFC 7233), supporting only
1156/// simple byte ranges without multi-part or suffix ranges.
1157///
1158/// # Supported Syntax
1159///
1160/// - **Bounded range**: `bytes=<start>-<end>` (both offsets specified)
1161/// - Example: `bytes=0-1023` → Returns `(0, 1023)`
1162/// - **Unbounded range**: `bytes=<start>-` (from start to EOF)
1163/// - Example: `bytes=1024-` → Returns `(1024, size-1)`
1164///
1165/// # Unsupported Syntax
1166///
1167/// - **Suffix range**: `bytes=-<length>` (last N bytes)
1168/// - Example: `bytes=-1024` → Returns `None`
1169/// - **Multi-part range**: `bytes=0-100,200-300`
1170/// - Example: `bytes=0-100,200-300` → Returns `None`
1171///
1172/// These are rejected because:
1173/// 1. They are rarely used in practice (<1% of range requests)
1174/// 2. They add significant parsing and response generation complexity
1175/// 3. The HTTP 416 error response is acceptable for clients that need them
1176///
1177/// # Arguments
1178///
1179/// - `range`: The value of the `Range` header (e.g., `"bytes=0-1023"`)
1180/// - `size`: The total size of the stream in bytes (used to validate offsets)
1181///
1182/// # Returns
1183///
1184/// - `Some((start, end))`: Valid range with absolute byte offsets (both inclusive)
1185/// - `None`: Invalid syntax or out-of-bounds range
1186///
1187/// # Error Conditions
1188///
1189/// Returns `None` if:
1190///
1191/// 1. **Missing prefix**: Header does not start with `"bytes="`
1192/// - Example: `"items=0-100"` → Error
1193/// 2. **Invalid integer**: Start or end cannot be parsed as `u64`
1194/// - Example: `"bytes=abc-def"` → Error
1195/// 3. **Inverted range**: Start offset is greater than end offset
1196/// - Example: `"bytes=1000-500"` → Error
1197/// 4. **Out of bounds**: End offset is beyond the stream size
1198/// - Example: `"bytes=0-999999"` when size is 1000 → Error
1199///
1200/// # Parsing Algorithm
1201///
1202/// ```text
1203/// 1. Check for "bytes=" prefix (RANGE_PREFIX_LEN = 6)
1204/// 2. Split remaining string on '-' delimiter
1205/// 3. Parse start offset (parts[0])
1206/// 4. Parse end offset (parts[1] if present and non-empty, else size-1)
1207/// 5. Validate: start <= end && end < size
1208/// 6. Return (start, end)
1209/// ```
1210///
1211/// # Edge Cases
1212///
1213/// ## Empty String After Prefix
1214///
1215/// ```text
1216/// Range: bytes=
1217/// ```
1218///
1219/// Returns `None` because there is no start offset.
1220///
1221/// ## Single Byte Range
1222///
1223/// ```text
1224/// Range: bytes=0-0
1225/// ```
1226///
1227/// Returns `Some((0, 0))` (valid, requests exactly 1 byte).
1228///
1229/// ## Range at EOF
1230///
1231/// ```text
1232/// Range: bytes=0-999 (size = 1000)
1233/// ```
1234///
1235/// Returns `Some((0, 999))` (valid, end is inclusive and equals `size - 1`).
1236///
1237/// ## Range Beyond EOF
1238///
1239/// ```text
1240/// Range: bytes=0-1000 (size = 1000)
1241/// ```
1242///
1243/// Returns `None` because offset 1000 does not exist (valid range is 0-999).
1244///
1245/// # Examples
1246///
1247/// ```text
1248/// parse_range("bytes=0-1023", 10000) -> Some((0, 1023))
1249/// parse_range("bytes=1024-", 10000) -> Some((1024, 9999))
1250/// parse_range("0-1023", 10000) -> None // missing "bytes=" prefix
1251/// parse_range("bytes=0-10000", 10000) -> None // out of bounds
1252/// parse_range("bytes=1000-500", 10000)-> None // inverted range
1253/// ```
1254///
1255/// # Performance
1256///
1257/// - **Time complexity**: O(n) where n is the length of the range string (typically <20 chars)
1258/// - **Allocation**: One heap allocation for the `split('-')` iterator's internal state
1259/// - **Typical latency**: <1 μs (negligible compared to archive read latency)
1260///
1261/// # Security
1262///
1263/// This function is resilient to malicious input:
1264///
1265/// - **Integer overflow**: `u64::parse` rejects values >2^64-1
1266/// - **Unbounded length**: The `Range` header is bounded by HTTP header size limits
1267/// (typically 8 KB, enforced by the HTTP server)
1268/// - **No allocation attacks**: Uses only one small allocation for splitting
1269pub fn parse_range(range: &str, size: u64) -> Option<(u64, u64)> {
1270 if !range.starts_with("bytes=") {
1271 return None;
1272 }
1273 let parts: Vec<&str> = range[RANGE_PREFIX_LEN..].split('-').collect();
1274 let start = parts[0].parse::<u64>().ok()?;
1275 let end = if parts.len() > 1 && !parts[1].is_empty() {
1276 parts[1].parse::<u64>().ok()?
1277 } else {
1278 size.saturating_sub(1)
1279 };
1280 if start > end || end >= size {
1281 return None;
1282 }
1283 Some((start, end))
1284}