fluke_hpack/
encoder.rs

1//! Implements all functionality related to encoding header blocks using
2//! HPACK.
3//!
4//! Clients should use the `Encoder` struct as the API for performing HPACK
5//! encoding.
6//!
7//! # Examples
8//!
9//! Encodes a header using a literal encoding.
10//!
11//! ```rust
12//! use fluke_hpack::Encoder;
13//!
14//! let mut encoder = Encoder::new();
15//!
16//! let headers = vec![
17//!     (&b"custom-key"[..], &b"custom-value"[..]),
18//! ];
19//! // First encoding...
20//! let result = encoder.encode(headers);
21//! // The result is a literal encoding of the header name and value, with an
22//! // initial byte representing the type of the encoding
23//! // (incremental indexing).
24//! assert_eq!(
25//!     vec![0x40,
26//!          10, b'c', b'u', b's', b't', b'o', b'm', b'-', b'k', b'e', b'y',
27//!          12, b'c', b'u', b's', b't', b'o', b'm', b'-', b'v', b'a', b'l',
28//!          b'u', b'e'],
29//!     result);
30//! ```
31//!
32//! Encodes some pseudo-headers that are already found in the static table.
33//!
34//! ```rust
35//! use fluke_hpack::Encoder;
36//!
37//! let mut encoder = Encoder::new();
38//! let headers = vec![
39//!     (&b":method"[..], &b"GET"[..]),
40//!     (&b":path"[..], &b"/"[..]),
41//! ];
42//!
43//! // The headers are encoded by providing their index (with a bit flag
44//! // indicating that the indexed representation is used).
45//! assert_eq!(encoder.encode(headers), vec![2 | 0x80, 4 | 0x80]);
46//! ```
47use std::io;
48use std::num::Wrapping;
49
50use super::HeaderTable;
51use super::STATIC_TABLE;
52
53/// Encode an integer to the representation defined by HPACK, writing it into the provider
54/// `io::Write` instance. Also allows the caller to specify the leading bits of the first
55/// octet. Any bits that are already set within the last `prefix_size` bits will be cleared
56/// and overwritten by the integer's representation (in other words, only the first
57/// `8 - prefix_size` bits from the `leading_bits` octet are reflected in the first octet
58/// emitted by the function.
59///
60/// # Example
61///
62/// ```rust
63/// use fluke_hpack::encoder::encode_integer_into;
64///
65/// {
66///     // No bits specified in the 3 most significant bits of the first octet
67///     let mut vec = Vec::new();
68///     encode_integer_into(10, 5, 0, &mut vec);
69///     assert_eq!(vec, vec![10]);
70/// }
71/// {
72///     // The most significant bit should be set; i.e. the 3 most significant
73///     // bits are 100.
74///     let mut vec = Vec::new();
75///     encode_integer_into(10, 5, 0x80, &mut vec);
76///     assert_eq!(vec, vec![0x8A]);
77/// }
78/// {
79///     // The most leading bits number has a bit set within the last prefix-size
80///     // bits -- they are ignored by the function
81///     // bits are 100.
82///     let mut vec = Vec::new();
83///     encode_integer_into(10, 5, 0x10, &mut vec);
84///     assert_eq!(vec, vec![0x0A]);
85/// }
86/// {
87///     let mut vec = Vec::new();
88///     encode_integer_into(1337, 5, 0, &mut vec);
89///     assert_eq!(vec, vec![31, 154, 10]);
90/// }
91/// ```
92pub fn encode_integer_into<W: io::Write>(
93    mut value: usize,
94    prefix_size: u8,
95    leading_bits: u8,
96    writer: &mut W,
97) -> io::Result<()> {
98    let Wrapping(mask) = if prefix_size >= 8 {
99        Wrapping(0xFF)
100    } else {
101        Wrapping(1u8 << prefix_size) - Wrapping(1)
102    };
103    // Clear any bits within the last `prefix_size` bits of the provided `leading_bits`.
104    // Failing to do so might lead to an incorrect encoding of the integer.
105    let leading_bits = leading_bits & (!mask);
106    let mask = mask as usize;
107    if value < mask {
108        writer.write_all(&[leading_bits | value as u8])?;
109        return Ok(());
110    }
111
112    writer.write_all(&[leading_bits | mask as u8])?;
113    value -= mask;
114    while value >= 128 {
115        writer.write_all(&[((value % 128) + 128) as u8])?;
116        value /= 128;
117    }
118    writer.write_all(&[value as u8])?;
119    Ok(())
120}
121
122/// Encode an integer to the representation defined by HPACK.
123///
124/// Returns a newly allocated `Vec` containing the encoded bytes.
125/// Only `prefix_size` lowest-order bits of the first byte in the
126/// array are guaranteed to be used.
127pub fn encode_integer(value: usize, prefix_size: u8) -> Vec<u8> {
128    let mut res = Vec::new();
129    encode_integer_into(value, prefix_size, 0, &mut res).unwrap();
130    res
131}
132
133/// Represents an HPACK encoder. Allows clients to encode arbitrary header sets
134/// and tracks the encoding context. That is, encoding subsequent header sets
135/// will use the context built by previous encode calls.
136///
137/// This is the main API for performing HPACK encoding of headers.
138///
139/// # Examples
140///
141/// Encoding a header two times in a row produces two different
142/// representations, due to the utilization of HPACK compression.
143///
144/// ```rust
145/// use fluke_hpack::Encoder;
146///
147/// let mut encoder = Encoder::new();
148///
149/// let headers = vec![
150///     (b"custom-key".to_vec(), b"custom-value".to_vec()),
151/// ];
152/// // First encoding...
153/// let result = encoder.encode(headers.iter().map(|h| (&h.0[..], &h.1[..])));
154/// // The result is a literal encoding of the header name and value, with an
155/// // initial byte representing the type of the encoding
156/// // (incremental indexing).
157/// assert_eq!(
158///     vec![0x40,
159///          10, b'c', b'u', b's', b't', b'o', b'm', b'-', b'k', b'e', b'y',
160///          12, b'c', b'u', b's', b't', b'o', b'm', b'-', b'v', b'a', b'l',
161///          b'u', b'e'],
162///     result);
163///
164/// // Encode the same headers again!
165/// let result = encoder.encode(headers.iter().map(|h| (&h.0[..], &h.1[..])));
166/// // The result is simply the index of the header in the header table (62),
167/// // with a flag representing that the decoder should use the index.
168/// assert_eq!(vec![0x80 | 62], result);
169/// ```
170pub struct Encoder<'a> {
171    /// The header table represents the encoder's context
172    header_table: HeaderTable<'a>,
173}
174
175impl<'a> Default for Encoder<'a> {
176    fn default() -> Self {
177        Self::new()
178    }
179}
180
181impl<'a> Encoder<'a> {
182    /// Creates a new `Encoder` with a default static table, as defined by the
183    /// HPACK spec (Appendix A).
184    pub fn new() -> Encoder<'a> {
185        Encoder {
186            header_table: HeaderTable::with_static_table(STATIC_TABLE),
187        }
188    }
189
190    /// Sets a new maximum dynamic table size for the encoder.
191    pub fn set_max_table_size(&mut self, new_max_size: usize) {
192        self.header_table
193            .dynamic_table
194            .set_max_table_size(new_max_size);
195    }
196
197    /// Encodes the given headers using the HPACK rules and returns a newly
198    /// allocated `Vec` containing the bytes representing the encoded header
199    /// set.
200    ///
201    /// The encoder so far supports only a single, extremely simple encoding
202    /// strategy, whereby each header is represented as an indexed header if
203    /// already found in the header table and a literal otherwise. When a
204    /// header isn't found in the table, it is added if the header name wasn't
205    /// found either (i.e. there are never two header names with different
206    /// values in the produced header table). Strings are always encoded as
207    /// literals (Huffman encoding is not used).
208    pub fn encode<'b, I>(&mut self, headers: I) -> Vec<u8>
209    where
210        I: IntoIterator<Item = (&'b [u8], &'b [u8])>,
211    {
212        let mut encoded: Vec<u8> = Vec::new();
213        self.encode_into(headers, &mut encoded).unwrap();
214        encoded
215    }
216
217    /// Encodes the given headers into the given `io::Write` instance. If the io::Write raises an
218    /// Error at any point, this error is propagated out. Any changes to the internal state of the
219    /// encoder will not be rolled back, though, so care should be taken to ensure that the paired
220    /// decoder also ends up seeing the same state updates or that their pairing is cancelled.
221    pub fn encode_into<'b, I, W>(&mut self, headers: I, writer: &mut W) -> io::Result<()>
222    where
223        I: IntoIterator<Item = (&'b [u8], &'b [u8])>,
224        W: io::Write,
225    {
226        for header in headers {
227            self.encode_header_into(header, writer)?;
228        }
229        Ok(())
230    }
231
232    /// Encodes a single given header into the given `io::Write` instance.
233    ///
234    /// Any errors are propagated, similarly to the `encode_into` method, and it is the callers
235    /// responsiblity to make sure that the paired encoder sees them too.
236    pub fn encode_header_into<W: io::Write>(
237        &mut self,
238        header: (&[u8], &[u8]),
239        writer: &mut W,
240    ) -> io::Result<()> {
241        match self.header_table.find_header(header) {
242            None => {
243                // The name of the header is in no tables: need to encode
244                // it with both a literal name and value.
245                self.encode_literal(&header, true, writer)?;
246                self.header_table
247                    .add_header(header.0.to_vec(), header.1.to_vec());
248            }
249            Some((index, false)) => {
250                // The name of the header is at the given index, but the
251                // value does not match the current one: need to encode
252                // only the value as a literal.
253                self.encode_indexed_name((index, header.1), false, writer)?;
254            }
255            Some((index, true)) => {
256                // The full header was found in one of the tables, so we
257                // just encode the index.
258                self.encode_indexed(index, writer)?;
259            }
260        };
261        Ok(())
262    }
263
264    /// Encodes a header as a literal (i.e. both the name and the value are
265    /// encoded as a string literal) and places the result in the given buffer
266    /// `buf`.
267    ///
268    /// # Parameters
269    ///
270    /// - `header` - the header to be encoded
271    /// - `should_index` - indicates whether the given header should be indexed, i.e.
272    ///                    inserted into the dynamic table
273    /// - `buf` - The buffer into which the result is placed
274    ///
275    fn encode_literal<W: io::Write>(
276        &mut self,
277        header: &(&[u8], &[u8]),
278        should_index: bool,
279        buf: &mut W,
280    ) -> io::Result<()> {
281        let mask = if should_index { 0x40 } else { 0x0 };
282
283        buf.write_all(&[mask])?;
284        self.encode_string_literal(header.0, buf)?;
285        self.encode_string_literal(header.1, buf)?;
286        Ok(())
287    }
288
289    /// Encodes a string literal and places the result in the given buffer
290    /// `buf`.
291    ///
292    /// The function does not consider Huffman encoding for now, but always
293    /// produces a string literal representations, according to the HPACK spec
294    /// section 5.2.
295    fn encode_string_literal<W: io::Write>(
296        &mut self,
297        octet_str: &[u8],
298        buf: &mut W,
299    ) -> io::Result<()> {
300        encode_integer_into(octet_str.len(), 7, 0, buf)?;
301        buf.write_all(octet_str)?;
302        Ok(())
303    }
304
305    /// Encodes a header whose name is indexed and places the result in the
306    /// given buffer `buf`.
307    fn encode_indexed_name<W: io::Write>(
308        &mut self,
309        header: (usize, &[u8]),
310        should_index: bool,
311        buf: &mut W,
312    ) -> io::Result<()> {
313        let (mask, prefix) = if should_index { (0x40, 6) } else { (0x0, 4) };
314
315        encode_integer_into(header.0, prefix, mask, buf)?;
316        // So far, we rely on just one strategy for encoding string literals.
317        self.encode_string_literal(header.1, buf)?;
318        Ok(())
319    }
320
321    /// Encodes an indexed header (a header that is fully in the header table)
322    /// and places the result in the given buffer `buf`.
323    ///
324    /// The encoding is according to the rules of the HPACK spec, section 6.1.
325    fn encode_indexed<W: io::Write>(&self, index: usize, buf: &mut W) -> io::Result<()> {
326        // We need to set the most significant bit, since the bit-pattern is
327        // `1xxxxxxx` for indexed headers.
328        encode_integer_into(index, 7, 0x80, buf)?;
329        Ok(())
330    }
331}
332
333#[cfg(test)]
334mod tests {
335    use tracing::debug;
336
337    use super::encode_integer;
338    use super::Encoder;
339
340    use super::super::Decoder;
341
342    #[test]
343    fn test_encode_integer() {
344        assert_eq!(encode_integer(10, 5), [10]);
345        assert_eq!(encode_integer(1337, 5), [31, 154, 10]);
346        assert_eq!(encode_integer(127, 7), [127, 0]);
347        assert_eq!(encode_integer(255, 8), [255, 0]);
348        assert_eq!(encode_integer(254, 8), [254]);
349        assert_eq!(encode_integer(1, 8), [1]);
350        assert_eq!(encode_integer(0, 8), [0]);
351        assert_eq!(encode_integer(255, 7), [127, 128, 1]);
352    }
353
354    /// A helper function that checks whether the given buffer can be decoded
355    /// into a set of headers that corresponds to the given `headers` list.
356    /// Relies on using the `fluke_hpack::decoder::Decoder`` struct for
357    /// performing the decoding.
358    ///
359    /// # Returns
360    ///
361    /// A `bool` indicating whether such a decoding can be performed.
362    fn is_decodable(buf: &[u8], headers: &Vec<(Vec<u8>, Vec<u8>)>) -> bool {
363        let mut decoder = Decoder::new();
364        match decoder.decode(buf).ok() {
365            Some(h) => h == *headers,
366            None => false,
367        }
368    }
369
370    /// Tests that encoding only the `:method` header works.
371    #[test]
372    fn test_encode_only_method() {
373        let mut encoder: Encoder = Encoder::new();
374        let headers = vec![(b":method".to_vec(), b"GET".to_vec())];
375
376        let result = encoder.encode(headers.iter().map(|h| (&h.0[..], &h.1[..])));
377
378        debug!("{:?}", result);
379        assert!(is_decodable(&result, &headers));
380    }
381
382    /// Tests that when a single custom header is sent it gets indexed by the
383    /// coder.
384    #[test]
385    fn test_custom_header_gets_indexed() {
386        let mut encoder: Encoder = Encoder::new();
387        let headers = vec![(b"custom-key".to_vec(), b"custom-value".to_vec())];
388
389        let result = encoder.encode(headers.iter().map(|h| (&h.0[..], &h.1[..])));
390        assert!(is_decodable(&result, &headers));
391        // The header is in the encoder's dynamic table.
392        assert_eq!(encoder.header_table.dynamic_table.to_vec(), headers);
393        // ...but also indicated as such in the output.
394        assert!(0x40 == (0x40 & result[0]));
395        debug!("{:?}", result);
396    }
397
398    /// Tests that when a header gets added to the dynamic table, the encoder
399    /// will use the index, instead of the literal representation on the next
400    /// encoding of the same header.
401    #[test]
402    fn test_uses_index_on_second_iteration() {
403        let mut encoder: Encoder = Encoder::new();
404        let headers = vec![(b"custom-key".to_vec(), b"custom-value".to_vec())];
405        // First encoding...
406        let _ = encoder.encode(headers.iter().map(|h| (&h.0[..], &h.1[..])));
407
408        // Encode the same headers again!
409        let result = encoder.encode(headers.iter().map(|h| (&h.0[..], &h.1[..])));
410
411        // The header is in the encoder's dynamic table.
412        assert_eq!(encoder.header_table.dynamic_table.to_vec(), headers);
413        // The output is a single index byte?
414        assert_eq!(result.len(), 1);
415        // The index is correctly encoded:
416        // - The most significant bit is set
417        assert_eq!(0x80 & result[0], 0x80);
418        // - The other 7 bits decode to an integer giving the index in the full
419        //   header address space.
420        assert_eq!(result[0] ^ 0x80, 62);
421        // The header table actually contains the header at that index?
422        assert_eq!(
423            encoder.header_table.get_from_table(62).unwrap(),
424            (&headers[0].0[..], &headers[0].1[..])
425        );
426    }
427
428    /// Tests that when a header name is indexed, but the value isn't, the
429    /// header is represented by an index (for the name) and a literal (for
430    /// the value).
431    #[test]
432    fn test_name_indexed_value_not() {
433        {
434            let mut encoder: Encoder = Encoder::new();
435            // `:method` is in the static table, but only for GET and POST
436            let headers = [(b":method", b"PUT")];
437
438            let result = encoder.encode(headers.iter().map(|h| (&h.0[..], &h.1[..])));
439
440            // The first byte represents the index in the header table: last
441            // occurrence of `:method` is at index 3.
442            assert_eq!(result[0], 3);
443            // The rest of it correctly represents PUT?
444            assert_eq!(&result[1..], &[3, b'P', b'U', b'T']);
445        }
446        {
447            let mut encoder: Encoder = Encoder::new();
448            // `:method` is in the static table, but only for GET and POST
449            let headers = [(b":authority".to_vec(), b"example.com".to_vec())];
450            let result = encoder.encode(headers.iter().map(|h| (&h.0[..], &h.1[..])));
451
452            assert_eq!(result[0], 1);
453            // The rest of it correctly represents PUT?
454            assert_eq!(
455                &result[1..],
456                &[11, b'e', b'x', b'a', b'm', b'p', b'l', b'e', b'.', b'c', b'o', b'm']
457            )
458        }
459    }
460
461    /// Tests that multiple headers are correctly encoded (i.e. can be decoded
462    /// back to their original representation).
463    #[test]
464    fn test_multiple_headers_encoded() {
465        let mut encoder = Encoder::new();
466        let headers = vec![
467            (b"custom-key".to_vec(), b"custom-value".to_vec()),
468            (b":method".to_vec(), b"GET".to_vec()),
469            (b":path".to_vec(), b"/some/path".to_vec()),
470        ];
471
472        let result = encoder.encode(headers.iter().map(|h| (&h.0[..], &h.1[..])));
473
474        assert!(is_decodable(&result, &headers));
475    }
476}