Skip to main content

specter/transport/h2/
hpack.rs

1//! HPACK header compression with custom pseudo-header ordering.
2//!
3//! This module provides a custom HPACK implementation with:
4//! - Custom pseudo-header ordering (Chrome uses `:method, :scheme, :authority, :path`)
5//! - Full control over header encoding for fingerprint accuracy
6//! - Complete Huffman encoding support
7
8use crate::transport::h2::hpack_impl::{Decoder, Encoder};
9use bytes::Bytes;
10
11/// Pseudo-header ordering for HTTP/2 fingerprinting.
12///
13/// Different browsers/clients send pseudo-headers in different orders.
14/// This order is visible in the Akamai HTTP/2 fingerprint.
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
16pub enum PseudoHeaderOrder {
17    /// Chrome order: :method, :scheme, :authority, :path (m,s,a,p)
18    #[default]
19    Chrome,
20    /// Firefox order: :method, :path, :authority, :scheme (m,p,a,s)
21    Firefox,
22    /// Safari order: :method, :scheme, :path, :authority (m,s,p,a)
23    Safari,
24    /// Legacy order: :method, :authority, :scheme, :path (m,a,s,p)
25    Standard,
26    /// Custom order specified by indices (0=method, 1=authority, 2=scheme, 3=path)
27    Custom([u8; 4]),
28}
29
30impl PseudoHeaderOrder {
31    /// Get the order as array indices.
32    /// Input array is [method(0), authority(1), scheme(2), path(3)].
33    /// Returns indices to select in output order.
34    fn order(&self) -> [usize; 4] {
35        match self {
36            // Chrome: m,s,a,p -> method, scheme, authority, path
37            Self::Chrome => [0, 2, 1, 3], // m=0, s=2, a=1, p=3
38            // Firefox: m,p,a,s
39            Self::Firefox => [0, 3, 1, 2], // m=0, p=3, a=1, s=2
40            // Safari: m,s,p,a
41            Self::Safari => [0, 2, 3, 1], // m=0, s=2, p=3, a=1
42            // Legacy: m,a,s,p (old incorrect Chrome assumption)
43            Self::Standard => [0, 1, 2, 3], // m=0, a=1, s=2, p=3
44            Self::Custom(order) => [
45                order[0] as usize,
46                order[1] as usize,
47                order[2] as usize,
48                order[3] as usize,
49            ],
50        }
51    }
52
53    /// Get the Akamai fingerprint string for this order.
54    pub fn akamai_string(&self) -> &'static str {
55        match self {
56            Self::Chrome => "m,s,a,p",
57            Self::Firefox => "m,p,a,s",
58            Self::Safari => "m,s,p,a",
59            Self::Standard => "m,a,s,p",
60            Self::Custom(_) => "custom",
61        }
62    }
63}
64
65/// HPACK encoder with custom pseudo-header ordering.
66pub struct HpackEncoder {
67    encoder: Encoder,
68    pseudo_order: PseudoHeaderOrder,
69}
70
71impl HpackEncoder {
72    /// Create a new encoder with the specified pseudo-header order.
73    pub fn new(pseudo_order: PseudoHeaderOrder) -> Self {
74        Self {
75            encoder: Encoder::new(),
76            pseudo_order,
77        }
78    }
79
80    /// Create encoder with Chrome pseudo-header order (default).
81    pub fn chrome() -> Self {
82        Self::new(PseudoHeaderOrder::Chrome)
83    }
84
85    /// Set the dynamic table size.
86    pub fn set_max_table_size(&mut self, size: usize) {
87        self.encoder.set_max_table_size(size);
88    }
89
90    /// Encode headers for an HTTP/2 request.
91    ///
92    /// Pseudo-headers are ordered according to the configured order.
93    /// Regular headers follow in the order provided.
94    pub fn encode_request(
95        &mut self,
96        method: &str,
97        scheme: &str,
98        authority: &str,
99        path: &str,
100        headers: &[(String, String)],
101    ) -> Bytes {
102        // Build pseudo-headers in configured order
103        let pseudo_headers: [(&[u8], &[u8]); 4] = [
104            (b":method", method.as_bytes()),
105            (b":authority", authority.as_bytes()),
106            (b":scheme", scheme.as_bytes()),
107            (b":path", path.as_bytes()),
108        ];
109
110        // Collect all headers in the correct order
111        let mut all_headers: Vec<(&[u8], &[u8])> = Vec::new();
112
113        // Storage for processed valid headers (lowercased name, value ref)
114        // We need this intermediate storage to ensure the Strings live long enough
115        // and to avoid borrow checker issues (references into a growing Vec).
116        let mut valid_headers: Vec<(String, &str)> = Vec::with_capacity(headers.len());
117
118        // Filter and process headers first
119        for (name, value) in headers {
120            // Skip any pseudo-headers that were incorrectly passed in
121            if name.starts_with(':') {
122                continue;
123            }
124
125            // RFC 9113 Section 8.1.2: Validate header name
126            if name.is_empty() {
127                continue;
128            }
129            if name
130                .as_bytes()
131                .iter()
132                .any(|&b| b < 0x21 || (b > 0x7E && b != 0x7F))
133            {
134                continue;
135            }
136
137            // HTTP/2 requires header names to be lowercase
138            let name_lower = name.to_lowercase();
139
140            // Skip connection-specific headers forbidden in HTTP/2
141            if name_lower == "connection"
142                || name_lower == "keep-alive"
143                || name_lower == "proxy-connection"
144                || name_lower == "transfer-encoding"
145                || name_lower == "upgrade"
146            {
147                continue;
148            }
149
150            // RFC 9113 Section 8.1.2.2: TE header allowed ONLY if value is "trailers"
151            if name_lower == "te" && value.to_lowercase() != "trailers" {
152                continue;
153            }
154
155            valid_headers.push((name_lower, value));
156        }
157
158        // Add pseudo-headers in the specified order
159        let order = self.pseudo_order.order();
160        for &idx in &order {
161            all_headers.push(pseudo_headers[idx]);
162        }
163
164        // Add regular headers from the validated list
165        for (n, v) in &valid_headers {
166            all_headers.push((n.as_bytes(), v.as_bytes()));
167        }
168
169        // Encode all headers
170        let encoded = self.encoder.encode(&all_headers);
171        Bytes::from(encoded)
172    }
173
174    /// Encode RFC 8441 Extended CONNECT headers for WebSocket over HTTP/2.
175    ///
176    /// The pseudo-header order is deterministic and spec-compliant for RFC 8441;
177    /// it is not claimed to be Chrome-exact.
178    pub fn encode_extended_connect_websocket(
179        &mut self,
180        authority: &str,
181        scheme: &str,
182        path: &str,
183        headers: &[(String, String)],
184    ) -> Result<Bytes, String> {
185        if authority.is_empty() {
186            return Err(":authority must not be empty".to_string());
187        }
188        if scheme.is_empty() {
189            return Err(":scheme must not be empty".to_string());
190        }
191        if path.is_empty() {
192            return Err(":path must not be empty".to_string());
193        }
194
195        let pseudo_headers: [(&[u8], &[u8]); 5] = [
196            (b":method", b"CONNECT"),
197            (b":protocol", b"websocket"),
198            (b":scheme", scheme.as_bytes()),
199            (b":path", path.as_bytes()),
200            (b":authority", authority.as_bytes()),
201        ];
202
203        let mut valid_headers: Vec<(String, &str)> = Vec::with_capacity(headers.len());
204
205        for (name, value) in headers {
206            if name.starts_with(':') {
207                return Err(format!("RFC 8441 user pseudo-header rejected: {name}"));
208            }
209
210            if name.is_empty() {
211                return Err("RFC 8441 header name must not be empty".to_string());
212            }
213            if name
214                .as_bytes()
215                .iter()
216                .any(|&b| b < 0x21 || (b > 0x7E && b != 0x7F))
217            {
218                return Err(format!("RFC 8441 invalid header name rejected: {name}"));
219            }
220
221            let name_lower = name.to_lowercase();
222            if matches!(
223                name_lower.as_str(),
224                "connection"
225                    | "upgrade"
226                    | "host"
227                    | "sec-websocket-key"
228                    | "sec-websocket-accept"
229                    | "sec-websocket-extensions"
230                    | "keep-alive"
231                    | "proxy-connection"
232                    | "transfer-encoding"
233            ) {
234                return Err(format!("RFC 8441 forbidden header rejected: {name_lower}"));
235            }
236
237            if name_lower == "te" && value.to_lowercase() != "trailers" {
238                return Err("RFC 8441 forbids TE values other than trailers".to_string());
239            }
240
241            valid_headers.push((name_lower, value));
242        }
243
244        let mut all_headers: Vec<(&[u8], &[u8])> =
245            Vec::with_capacity(pseudo_headers.len() + valid_headers.len());
246        all_headers.extend_from_slice(&pseudo_headers);
247        for (name, value) in &valid_headers {
248            all_headers.push((name.as_bytes(), value.as_bytes()));
249        }
250
251        let encoded = self.encoder.encode(&all_headers);
252        Ok(Bytes::from(encoded))
253    }
254
255    /// Split an encoded header block into chunks if it exceeds max_frame_size.
256    /// Returns (first_chunk, remaining_chunks).
257    ///
258    /// This is used when header blocks exceed MAX_FRAME_SIZE and must be
259    /// split across HEADERS + CONTINUATION frames per RFC 9113 Section 6.10.
260    ///
261    /// Use this after calling encode_request() to chunk the result if needed.
262    pub fn chunk_encoded(encoded: Bytes, max_frame_size: usize) -> (Bytes, Vec<Bytes>) {
263        if encoded.len() <= max_frame_size {
264            // Fits in single frame
265            return (encoded, Vec::new());
266        }
267
268        // Split into chunks
269        let mut chunks: Vec<Bytes> = encoded
270            .chunks(max_frame_size)
271            .map(Bytes::copy_from_slice)
272            .collect();
273
274        let first = chunks.remove(0);
275        (first, chunks)
276    }
277}
278
279/// HPACK decoder.
280pub struct HpackDecoder {
281    decoder: Decoder,
282}
283
284impl HpackDecoder {
285    /// Create a new decoder.
286    pub fn new() -> Self {
287        Self {
288            decoder: Decoder::new(),
289        }
290    }
291
292    /// Set the maximum dynamic table size.
293    pub fn set_max_table_size(&mut self, size: usize) {
294        self.decoder.set_max_table_size(size);
295    }
296
297    /// Decode a header block into a list of headers.
298    pub fn decode(&mut self, data: &[u8]) -> Result<Vec<(String, String)>, String> {
299        let mut headers = Vec::new();
300
301        self.decoder
302            .decode_with_cb(data, |name, value| {
303                let name_str = String::from_utf8_lossy(name).into_owned();
304                let value_str = String::from_utf8_lossy(value).into_owned();
305                headers.push((name_str, value_str));
306            })
307            .map_err(|e| format!("HPACK decode error: {:?}", e))?;
308
309        Ok(headers)
310    }
311}
312
313impl Default for HpackDecoder {
314    fn default() -> Self {
315        Self::new()
316    }
317}
318
319#[cfg(test)]
320mod tests {
321    use super::*;
322
323    #[test]
324    fn test_pseudo_order_chrome() {
325        let order = PseudoHeaderOrder::Chrome;
326        assert_eq!(order.akamai_string(), "m,s,a,p");
327    }
328
329    #[test]
330    fn test_pseudo_order_standard() {
331        let order = PseudoHeaderOrder::Standard;
332        assert_eq!(order.akamai_string(), "m,a,s,p");
333    }
334
335    #[test]
336    fn test_encoder_creates_valid_block() {
337        let mut encoder = HpackEncoder::chrome();
338        let block = encoder.encode_request(
339            "GET",
340            "https",
341            "example.com",
342            "/",
343            &[("user-agent".to_string(), "test".to_string())],
344        );
345
346        // Block should be non-empty
347        assert!(!block.is_empty());
348
349        // Decode and verify
350        let mut decoder = HpackDecoder::new();
351        let headers = decoder.decode(&block).unwrap();
352
353        // Should have 5 headers (4 pseudo + 1 regular)
354        assert_eq!(headers.len(), 5);
355
356        // Verify Chrome order: m,s,a,p
357        assert_eq!(headers[0].0, ":method");
358        assert_eq!(headers[0].1, "GET");
359        assert_eq!(headers[1].0, ":scheme");
360        assert_eq!(headers[1].1, "https");
361        assert_eq!(headers[2].0, ":authority");
362        assert_eq!(headers[2].1, "example.com");
363        assert_eq!(headers[3].0, ":path");
364        assert_eq!(headers[3].1, "/");
365        assert_eq!(headers[4].0, "user-agent");
366        assert_eq!(headers[4].1, "test");
367    }
368
369    #[test]
370    fn test_encoder_standard_order() {
371        let mut encoder = HpackEncoder::new(PseudoHeaderOrder::Standard);
372        let block = encoder.encode_request("GET", "https", "example.com", "/", &[]);
373
374        let mut decoder = HpackDecoder::new();
375        let headers = decoder.decode(&block).unwrap();
376
377        // Verify Standard/legacy order: m,a,s,p
378        assert_eq!(headers[0].0, ":method");
379        assert_eq!(headers[1].0, ":authority");
380        assert_eq!(headers[2].0, ":scheme");
381        assert_eq!(headers[3].0, ":path");
382    }
383
384    #[test]
385    fn test_encoder_filters_connection_headers() {
386        let mut encoder = HpackEncoder::chrome();
387        let block = encoder.encode_request(
388            "GET",
389            "https",
390            "example.com",
391            "/",
392            &[
393                ("connection".to_string(), "keep-alive".to_string()),
394                ("keep-alive".to_string(), "timeout=5".to_string()),
395                ("user-agent".to_string(), "test".to_string()),
396            ],
397        );
398
399        let mut decoder = HpackDecoder::new();
400        let headers = decoder.decode(&block).unwrap();
401
402        // Should only have pseudo-headers + user-agent (connection headers filtered)
403        assert_eq!(headers.len(), 5);
404        assert_eq!(headers[4].0, "user-agent");
405    }
406}