loona_hpack/
encoder.rs

1//! Implements all functionality related to encoding header blocks using
2//! HPACK.
3//!
4//! Clients should use the `Encoder` struct as the API for performing HPACK
5//! encoding.
6//!
7//! # Examples
8//!
9//! Encodes a header using a literal encoding.
10//!
11//! ```rust
12//! use loona_hpack::Encoder;
13//!
14//! let mut encoder = Encoder::new();
15//!
16//! let headers = vec![
17//!     (&b"custom-key"[..], &b"custom-value"[..]),
18//! ];
19//! // First encoding...
20//! let result = encoder.encode(headers);
21//! // The result is a literal encoding of the header name and value, with an
22//! // initial byte representing the type of the encoding
23//! // (incremental indexing).
24//! assert_eq!(
25//!     vec![0x40,
26//!          10, b'c', b'u', b's', b't', b'o', b'm', b'-', b'k', b'e', b'y',
27//!          12, b'c', b'u', b's', b't', b'o', b'm', b'-', b'v', b'a', b'l',
28//!          b'u', b'e'],
29//!     result);
30//! ```
31//!
32//! Encodes some pseudo-headers that are already found in the static table.
33//!
34//! ```rust
35//! use loona_hpack::Encoder;
36//!
37//! let mut encoder = Encoder::new();
38//! let headers = vec![
39//!     (&b":method"[..], &b"GET"[..]),
40//!     (&b":path"[..], &b"/"[..]),
41//! ];
42//!
43//! // The headers are encoded by providing their index (with a bit flag
44//! // indicating that the indexed representation is used).
45//! assert_eq!(encoder.encode(headers), vec![2 | 0x80, 4 | 0x80]);
46//! ```
47use std::io;
48use std::num::Wrapping;
49
50use super::HeaderTable;
51use super::STATIC_TABLE;
52
53/// Encode an integer to the representation defined by HPACK, writing it into
54/// the provider `io::Write` instance. Also allows the caller to specify the
55/// leading bits of the first octet. Any bits that are already set within the
56/// last `prefix_size` bits will be cleared and overwritten by the integer's
57/// representation (in other words, only the first `8 - prefix_size` bits from
58/// the `leading_bits` octet are reflected in the first octet emitted by the
59/// function.
60///
61/// # Example
62///
63/// ```rust
64/// use loona_hpack::encoder::encode_integer_into;
65///
66/// {
67///     // No bits specified in the 3 most significant bits of the first octet
68///     let mut vec = Vec::new();
69///     encode_integer_into(10, 5, 0, &mut vec);
70///     assert_eq!(vec, vec![10]);
71/// }
72/// {
73///     // The most significant bit should be set; i.e. the 3 most significant
74///     // bits are 100.
75///     let mut vec = Vec::new();
76///     encode_integer_into(10, 5, 0x80, &mut vec);
77///     assert_eq!(vec, vec![0x8A]);
78/// }
79/// {
80///     // The most leading bits number has a bit set within the last prefix-size
81///     // bits -- they are ignored by the function
82///     // bits are 100.
83///     let mut vec = Vec::new();
84///     encode_integer_into(10, 5, 0x10, &mut vec);
85///     assert_eq!(vec, vec![0x0A]);
86/// }
87/// {
88///     let mut vec = Vec::new();
89///     encode_integer_into(1337, 5, 0, &mut vec);
90///     assert_eq!(vec, vec![31, 154, 10]);
91/// }
92/// ```
93pub fn encode_integer_into<W: io::Write>(
94    mut value: usize,
95    prefix_size: u8,
96    leading_bits: u8,
97    writer: &mut W,
98) -> io::Result<()> {
99    let Wrapping(mask) = if prefix_size >= 8 {
100        Wrapping(0xFF)
101    } else {
102        Wrapping(1u8 << prefix_size) - Wrapping(1)
103    };
104    // Clear any bits within the last `prefix_size` bits of the provided
105    // `leading_bits`. Failing to do so might lead to an incorrect encoding of
106    // the integer.
107    let leading_bits = leading_bits & (!mask);
108    let mask = mask as usize;
109    if value < mask {
110        writer.write_all(&[leading_bits | value as u8])?;
111        return Ok(());
112    }
113
114    writer.write_all(&[leading_bits | mask as u8])?;
115    value -= mask;
116    while value >= 128 {
117        writer.write_all(&[((value % 128) + 128) as u8])?;
118        value /= 128;
119    }
120    writer.write_all(&[value as u8])?;
121    Ok(())
122}
123
124/// Encode an integer to the representation defined by HPACK.
125///
126/// Returns a newly allocated `Vec` containing the encoded bytes.
127/// Only `prefix_size` lowest-order bits of the first byte in the
128/// array are guaranteed to be used.
129pub fn encode_integer(value: usize, prefix_size: u8) -> Vec<u8> {
130    let mut res = Vec::new();
131    encode_integer_into(value, prefix_size, 0, &mut res).unwrap();
132    res
133}
134
135/// Represents an HPACK encoder. Allows clients to encode arbitrary header sets
136/// and tracks the encoding context. That is, encoding subsequent header sets
137/// will use the context built by previous encode calls.
138///
139/// This is the main API for performing HPACK encoding of headers.
140///
141/// # Examples
142///
143/// Encoding a header two times in a row produces two different
144/// representations, due to the utilization of HPACK compression.
145///
146/// ```rust
147/// use loona_hpack::Encoder;
148///
149/// let mut encoder = Encoder::new();
150///
151/// let headers = vec![
152///     (b"custom-key".to_vec(), b"custom-value".to_vec()),
153/// ];
154/// // First encoding...
155/// let result = encoder.encode(headers.iter().map(|h| (&h.0[..], &h.1[..])));
156/// // The result is a literal encoding of the header name and value, with an
157/// // initial byte representing the type of the encoding
158/// // (incremental indexing).
159/// assert_eq!(
160///     vec![0x40,
161///          10, b'c', b'u', b's', b't', b'o', b'm', b'-', b'k', b'e', b'y',
162///          12, b'c', b'u', b's', b't', b'o', b'm', b'-', b'v', b'a', b'l',
163///          b'u', b'e'],
164///     result);
165///
166/// // Encode the same headers again!
167/// let result = encoder.encode(headers.iter().map(|h| (&h.0[..], &h.1[..])));
168/// // The result is simply the index of the header in the header table (62),
169/// // with a flag representing that the decoder should use the index.
170/// assert_eq!(vec![0x80 | 62], result);
171/// ```
172pub struct Encoder<'a> {
173    /// The header table represents the encoder's context
174    header_table: HeaderTable<'a>,
175}
176
177impl<'a> Default for Encoder<'a> {
178    fn default() -> Self {
179        Self::new()
180    }
181}
182
183impl<'a> Encoder<'a> {
184    /// Creates a new `Encoder` with a default static table, as defined by the
185    /// HPACK spec (Appendix A).
186    pub fn new() -> Encoder<'a> {
187        Encoder {
188            header_table: HeaderTable::with_static_table(STATIC_TABLE),
189        }
190    }
191
192    /// Sets a new maximum dynamic table size for the encoder.
193    pub fn set_max_table_size(&mut self, new_max_size: usize) {
194        self.header_table
195            .dynamic_table
196            .set_max_table_size(new_max_size);
197    }
198
199    /// Encodes the given headers using the HPACK rules and returns a newly
200    /// allocated `Vec` containing the bytes representing the encoded header
201    /// set.
202    ///
203    /// The encoder so far supports only a single, extremely simple encoding
204    /// strategy, whereby each header is represented as an indexed header if
205    /// already found in the header table and a literal otherwise. When a
206    /// header isn't found in the table, it is added if the header name wasn't
207    /// found either (i.e. there are never two header names with different
208    /// values in the produced header table). Strings are always encoded as
209    /// literals (Huffman encoding is not used).
210    pub fn encode<'b, I>(&mut self, headers: I) -> Vec<u8>
211    where
212        I: IntoIterator<Item = (&'b [u8], &'b [u8])>,
213    {
214        let mut encoded: Vec<u8> = Vec::new();
215        self.encode_into(headers, &mut encoded).unwrap();
216        encoded
217    }
218
219    /// Encodes the given headers into the given `io::Write` instance. If the
220    /// io::Write raises an Error at any point, this error is propagated
221    /// out. Any changes to the internal state of the encoder will not be
222    /// rolled back, though, so care should be taken to ensure that the paired
223    /// decoder also ends up seeing the same state updates or that their pairing
224    /// is cancelled.
225    pub fn encode_into<'b, I, W>(&mut self, headers: I, writer: &mut W) -> io::Result<()>
226    where
227        I: IntoIterator<Item = (&'b [u8], &'b [u8])>,
228        W: io::Write,
229    {
230        for header in headers {
231            self.encode_header_into(header, writer)?;
232        }
233        Ok(())
234    }
235
236    /// Encodes a single given header into the given `io::Write` instance.
237    ///
238    /// Any errors are propagated, similarly to the `encode_into` method, and it
239    /// is the callers responsiblity to make sure that the paired encoder
240    /// sees them too.
241    pub fn encode_header_into<W: io::Write>(
242        &mut self,
243        header: (&[u8], &[u8]),
244        writer: &mut W,
245    ) -> io::Result<()> {
246        match self.header_table.find_header(header) {
247            None => {
248                // The name of the header is in no tables: need to encode
249                // it with both a literal name and value.
250                self.encode_literal(&header, true, writer)?;
251                self.header_table
252                    .add_header(header.0.to_vec(), header.1.to_vec());
253            }
254            Some((index, false)) => {
255                // The name of the header is at the given index, but the
256                // value does not match the current one: need to encode
257                // only the value as a literal.
258                self.encode_indexed_name((index, header.1), false, writer)?;
259            }
260            Some((index, true)) => {
261                // The full header was found in one of the tables, so we
262                // just encode the index.
263                self.encode_indexed(index, writer)?;
264            }
265        };
266        Ok(())
267    }
268
269    /// Encodes a header as a literal (i.e. both the name and the value are
270    /// encoded as a string literal) and places the result in the given buffer
271    /// `buf`.
272    ///
273    /// # Parameters
274    ///
275    /// - `header` - the header to be encoded
276    /// - `should_index` - indicates whether the given header should be indexed,
277    ///   i.e. inserted into the dynamic table
278    /// - `buf` - The buffer into which the result is placed
279    fn encode_literal<W: io::Write>(
280        &mut self,
281        header: &(&[u8], &[u8]),
282        should_index: bool,
283        buf: &mut W,
284    ) -> io::Result<()> {
285        let mask = if should_index { 0x40 } else { 0x0 };
286
287        buf.write_all(&[mask])?;
288        self.encode_string_literal(header.0, buf)?;
289        self.encode_string_literal(header.1, buf)?;
290        Ok(())
291    }
292
293    /// Encodes a string literal and places the result in the given buffer
294    /// `buf`.
295    ///
296    /// The function does not consider Huffman encoding for now, but always
297    /// produces a string literal representations, according to the HPACK spec
298    /// section 5.2.
299    fn encode_string_literal<W: io::Write>(
300        &mut self,
301        octet_str: &[u8],
302        buf: &mut W,
303    ) -> io::Result<()> {
304        encode_integer_into(octet_str.len(), 7, 0, buf)?;
305        buf.write_all(octet_str)?;
306        Ok(())
307    }
308
309    /// Encodes a header whose name is indexed and places the result in the
310    /// given buffer `buf`.
311    fn encode_indexed_name<W: io::Write>(
312        &mut self,
313        header: (usize, &[u8]),
314        should_index: bool,
315        buf: &mut W,
316    ) -> io::Result<()> {
317        let (mask, prefix) = if should_index { (0x40, 6) } else { (0x0, 4) };
318
319        encode_integer_into(header.0, prefix, mask, buf)?;
320        // So far, we rely on just one strategy for encoding string literals.
321        self.encode_string_literal(header.1, buf)?;
322        Ok(())
323    }
324
325    /// Encodes an indexed header (a header that is fully in the header table)
326    /// and places the result in the given buffer `buf`.
327    ///
328    /// The encoding is according to the rules of the HPACK spec, section 6.1.
329    fn encode_indexed<W: io::Write>(&self, index: usize, buf: &mut W) -> io::Result<()> {
330        // We need to set the most significant bit, since the bit-pattern is
331        // `1xxxxxxx` for indexed headers.
332        encode_integer_into(index, 7, 0x80, buf)?;
333        Ok(())
334    }
335}
336
337#[cfg(test)]
338mod tests {
339    use tracing::debug;
340
341    use super::encode_integer;
342    use super::Encoder;
343
344    use super::super::Decoder;
345
346    #[test]
347    fn test_encode_integer() {
348        assert_eq!(encode_integer(10, 5), [10]);
349        assert_eq!(encode_integer(1337, 5), [31, 154, 10]);
350        assert_eq!(encode_integer(127, 7), [127, 0]);
351        assert_eq!(encode_integer(255, 8), [255, 0]);
352        assert_eq!(encode_integer(254, 8), [254]);
353        assert_eq!(encode_integer(1, 8), [1]);
354        assert_eq!(encode_integer(0, 8), [0]);
355        assert_eq!(encode_integer(255, 7), [127, 128, 1]);
356    }
357
358    /// A helper function that checks whether the given buffer can be decoded
359    /// into a set of headers that corresponds to the given `headers` list.
360    /// Relies on using the `loona_hpack::decoder::Decoder`` struct for
361    /// performing the decoding.
362    ///
363    /// # Returns
364    ///
365    /// A `bool` indicating whether such a decoding can be performed.
366    fn is_decodable(buf: &[u8], headers: &Vec<(Vec<u8>, Vec<u8>)>) -> bool {
367        let mut decoder = Decoder::new();
368        match decoder.decode(buf).ok() {
369            Some(h) => h == *headers,
370            None => false,
371        }
372    }
373
374    /// Tests that encoding only the `:method` header works.
375    #[test]
376    fn test_encode_only_method() {
377        let mut encoder: Encoder = Encoder::new();
378        let headers = vec![(b":method".to_vec(), b"GET".to_vec())];
379
380        let result = encoder.encode(headers.iter().map(|h| (&h.0[..], &h.1[..])));
381
382        debug!("{:?}", result);
383        assert!(is_decodable(&result, &headers));
384    }
385
386    /// Tests that when a single custom header is sent it gets indexed by the
387    /// coder.
388    #[test]
389    fn test_custom_header_gets_indexed() {
390        let mut encoder: Encoder = Encoder::new();
391        let headers = vec![(b"custom-key".to_vec(), b"custom-value".to_vec())];
392
393        let result = encoder.encode(headers.iter().map(|h| (&h.0[..], &h.1[..])));
394        assert!(is_decodable(&result, &headers));
395        // The header is in the encoder's dynamic table.
396        assert_eq!(encoder.header_table.dynamic_table.to_vec(), headers);
397        // ...but also indicated as such in the output.
398        assert!(0x40 == (0x40 & result[0]));
399        debug!("{:?}", result);
400    }
401
402    /// Tests that when a header gets added to the dynamic table, the encoder
403    /// will use the index, instead of the literal representation on the next
404    /// encoding of the same header.
405    #[test]
406    fn test_uses_index_on_second_iteration() {
407        let mut encoder: Encoder = Encoder::new();
408        let headers = vec![(b"custom-key".to_vec(), b"custom-value".to_vec())];
409        // First encoding...
410        let _ = encoder.encode(headers.iter().map(|h| (&h.0[..], &h.1[..])));
411
412        // Encode the same headers again!
413        let result = encoder.encode(headers.iter().map(|h| (&h.0[..], &h.1[..])));
414
415        // The header is in the encoder's dynamic table.
416        assert_eq!(encoder.header_table.dynamic_table.to_vec(), headers);
417        // The output is a single index byte?
418        assert_eq!(result.len(), 1);
419        // The index is correctly encoded:
420        // - The most significant bit is set
421        assert_eq!(0x80 & result[0], 0x80);
422        // - The other 7 bits decode to an integer giving the index in the full header
423        //   address space.
424        assert_eq!(result[0] ^ 0x80, 62);
425        // The header table actually contains the header at that index?
426        assert_eq!(
427            encoder.header_table.get_from_table(62).unwrap(),
428            (&headers[0].0[..], &headers[0].1[..])
429        );
430    }
431
432    /// Tests that when a header name is indexed, but the value isn't, the
433    /// header is represented by an index (for the name) and a literal (for
434    /// the value).
435    #[test]
436    fn test_name_indexed_value_not() {
437        {
438            let mut encoder: Encoder = Encoder::new();
439            // `:method` is in the static table, but only for GET and POST
440            let headers = [(b":method", b"PUT")];
441
442            let result = encoder.encode(headers.iter().map(|h| (&h.0[..], &h.1[..])));
443
444            // The first byte represents the index in the header table: last
445            // occurrence of `:method` is at index 3.
446            assert_eq!(result[0], 3);
447            // The rest of it correctly represents PUT?
448            assert_eq!(&result[1..], &[3, b'P', b'U', b'T']);
449        }
450        {
451            let mut encoder: Encoder = Encoder::new();
452            // `:method` is in the static table, but only for GET and POST
453            let headers = [(b":authority".to_vec(), b"example.com".to_vec())];
454            let result = encoder.encode(headers.iter().map(|h| (&h.0[..], &h.1[..])));
455
456            assert_eq!(result[0], 1);
457            // The rest of it correctly represents PUT?
458            assert_eq!(
459                &result[1..],
460                &[11, b'e', b'x', b'a', b'm', b'p', b'l', b'e', b'.', b'c', b'o', b'm']
461            )
462        }
463    }
464
465    /// Tests that multiple headers are correctly encoded (i.e. can be decoded
466    /// back to their original representation).
467    #[test]
468    fn test_multiple_headers_encoded() {
469        let mut encoder = Encoder::new();
470        let headers = vec![
471            (b"custom-key".to_vec(), b"custom-value".to_vec()),
472            (b":method".to_vec(), b"GET".to_vec()),
473            (b":path".to_vec(), b"/some/path".to_vec()),
474        ];
475
476        let result = encoder.encode(headers.iter().map(|h| (&h.0[..], &h.1[..])));
477
478        assert!(is_decodable(&result, &headers));
479    }
480}