Skip to main content

nexus_web/http/
chunked.rs

1//! Chunked Transfer-Encoding decoder (sans-IO).
2//!
3//! Strips chunk framing from HTTP/1.1 chunked responses.
4//! Feed wire bytes in, get decoded body bytes out.
5
6use super::error::HttpError;
7
8/// Decoder state.
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10enum State {
11    /// Reading the hex chunk size + \r\n.
12    ChunkSize,
13    /// Reading chunk data bytes.
14    ChunkData { remaining: usize },
15    /// Reading the \r\n after chunk data.
16    ChunkDataTrailer,
17    /// Consuming optional trailer headers after the zero chunk.
18    /// RFC 7230 §4.1: trailers end with an empty line (\r\n).
19    /// `prev_was_lf` tracks if the previous byte was \n.
20    Trailers { prev_was_lf: bool },
21    /// Final zero-length chunk and trailers consumed. Done.
22    Done,
23}
24
25/// Sans-IO chunked transfer-encoding decoder.
26///
27/// Feed wire bytes via [`decode`](Self::decode). Decoded body bytes are written
28/// into the output buffer. Returns how many input bytes were consumed
29/// and how many output bytes were produced.
30///
31/// # Usage
32///
33/// ```ignore
34/// let mut decoder = ChunkedDecoder::new();
35/// let (consumed, produced) = decoder.decode(wire_bytes, &mut output_buf)?;
36/// ```
37pub struct ChunkedDecoder {
38    state: State,
39    /// Accumulates the hex chunk size digits.
40    size_buf: [u8; 16],
41    size_len: usize,
42    /// Total decoded body bytes so far.
43    total_decoded: usize,
44}
45
46impl ChunkedDecoder {
47    /// Create a new decoder.
48    #[must_use]
49    pub fn new() -> Self {
50        Self {
51            state: State::ChunkSize,
52            size_buf: [0; 16],
53            size_len: 0,
54            total_decoded: 0,
55        }
56    }
57
58    /// Whether the final zero-length chunk has been seen.
59    pub fn is_done(&self) -> bool {
60        self.state == State::Done
61    }
62
63    /// Total decoded body bytes produced so far.
64    pub fn total_decoded(&self) -> usize {
65        self.total_decoded
66    }
67
68    /// Decode chunked wire bytes into body bytes.
69    ///
70    /// Returns `(consumed, produced)` — how many input bytes were consumed
71    /// and how many output bytes were written.
72    ///
73    /// Call repeatedly as more wire bytes arrive. When `is_done()` returns
74    /// true, the body is complete.
75    pub fn decode(&mut self, input: &[u8], output: &mut [u8]) -> Result<(usize, usize), HttpError> {
76        let mut in_pos = 0;
77        let mut out_pos = 0;
78
79        while in_pos < input.len() && self.state != State::Done {
80            match self.state {
81                State::ChunkSize => {
82                    // Scan for \n to find end of chunk size line.
83                    let b = input[in_pos];
84                    in_pos += 1;
85
86                    if b == b'\n' {
87                        // Parse the hex size (ignore optional chunk extensions after ';')
88                        let size_str = std::str::from_utf8(&self.size_buf[..self.size_len])
89                            .map_err(|_| HttpError::Malformed("invalid UTF-8 in chunk size"))?;
90                        let hex_part = size_str.split(';').next().unwrap_or("").trim();
91                        let chunk_size = usize::from_str_radix(hex_part, 16)
92                            .map_err(|_| HttpError::Malformed("invalid hex in chunk size"))?;
93
94                        self.size_len = 0;
95
96                        if chunk_size == 0 {
97                            // Zero chunk = end of body. Consume optional
98                            // trailer headers + terminating empty line.
99                            self.state = State::Trailers { prev_was_lf: true };
100                        } else {
101                            self.state = State::ChunkData {
102                                remaining: chunk_size,
103                            };
104                        }
105                    } else if b == b'\r' {
106                        // Skip CR before LF.
107                    } else {
108                        if self.size_len >= self.size_buf.len() {
109                            return Err(HttpError::Malformed("chunk size line too long"));
110                        }
111                        self.size_buf[self.size_len] = b;
112                        self.size_len += 1;
113                    }
114                }
115
116                State::ChunkData { remaining } => {
117                    // Copy chunk data to output.
118                    let available_in = input.len() - in_pos;
119                    let available_out = output.len() - out_pos;
120                    let to_copy = remaining.min(available_in).min(available_out);
121
122                    if to_copy == 0 {
123                        // Output buffer full — caller needs to process and call again.
124                        break;
125                    }
126
127                    output[out_pos..out_pos + to_copy]
128                        .copy_from_slice(&input[in_pos..in_pos + to_copy]);
129                    in_pos += to_copy;
130                    out_pos += to_copy;
131                    self.total_decoded += to_copy;
132
133                    let new_remaining = remaining - to_copy;
134                    if new_remaining == 0 {
135                        self.state = State::ChunkDataTrailer;
136                    } else {
137                        self.state = State::ChunkData {
138                            remaining: new_remaining,
139                        };
140                    }
141                }
142
143                State::ChunkDataTrailer => {
144                    // Consume \r\n after chunk data → next chunk.
145                    let b = input[in_pos];
146                    in_pos += 1;
147                    if b == b'\n' {
148                        self.state = State::ChunkSize;
149                    }
150                }
151
152                State::Trailers { prev_was_lf } => {
153                    // Consume optional trailer headers until empty line.
154                    // Empty line = \n immediately after \n (with optional
155                    // \r between). Detects both \r\n\r\n and \n\n.
156                    let b = input[in_pos];
157                    in_pos += 1;
158                    if b == b'\n' {
159                        if prev_was_lf {
160                            // \n\n — empty line, done.
161                            self.state = State::Done;
162                        } else {
163                            self.state = State::Trailers { prev_was_lf: true };
164                        }
165                    } else if b == b'\r' {
166                        // \r doesn't reset the \n flag — \n\r\n is valid.
167                        self.state = State::Trailers { prev_was_lf };
168                    } else {
169                        // Non-CRLF byte — part of a trailer header line.
170                        self.state = State::Trailers { prev_was_lf: false };
171                    }
172                }
173
174                State::Done => break,
175            }
176        }
177
178        Ok((in_pos, out_pos))
179    }
180
181    /// Reset for reuse.
182    pub fn reset(&mut self) {
183        self.state = State::ChunkSize;
184        self.size_len = 0;
185        self.total_decoded = 0;
186    }
187}
188
189impl Default for ChunkedDecoder {
190    fn default() -> Self {
191        Self::new()
192    }
193}
194
195#[cfg(test)]
196mod tests {
197    use super::*;
198
199    #[test]
200    fn single_chunk() {
201        let mut dec = ChunkedDecoder::new();
202        let input = b"d\r\nHello, world!\r\n0\r\n\r\n";
203        let mut output = [0u8; 64];
204
205        let (consumed, produced) = dec.decode(input, &mut output).unwrap();
206        assert_eq!(consumed, input.len());
207        assert_eq!(produced, 13);
208        assert_eq!(&output[..produced], b"Hello, world!");
209        assert!(dec.is_done());
210    }
211
212    #[test]
213    fn multiple_chunks() {
214        let mut dec = ChunkedDecoder::new();
215        let input = b"7\r\nMozilla\r\n11\r\nDeveloper Network\r\n0\r\n\r\n";
216        let mut output = [0u8; 64];
217
218        let (consumed, produced) = dec.decode(input, &mut output).unwrap();
219        assert_eq!(consumed, input.len());
220        assert_eq!(produced, 24);
221        assert_eq!(&output[..produced], b"MozillaDeveloper Network");
222        assert!(dec.is_done());
223    }
224
225    #[test]
226    fn byte_by_byte() {
227        let mut dec = ChunkedDecoder::new();
228        let input = b"5\r\nhello\r\n0\r\n\r\n";
229        let mut output = [0u8; 64];
230        let mut total_out = 0;
231
232        for &b in input {
233            let (_, produced) = dec.decode(&[b], &mut output[total_out..]).unwrap();
234            total_out += produced;
235        }
236
237        assert_eq!(total_out, 5);
238        assert_eq!(&output[..5], b"hello");
239        assert!(dec.is_done());
240    }
241
242    #[test]
243    fn hex_uppercase() {
244        let mut dec = ChunkedDecoder::new();
245        let input = b"A\r\n0123456789\r\n0\r\n\r\n";
246        let mut output = [0u8; 64];
247
248        let (_, produced) = dec.decode(input, &mut output).unwrap();
249        assert_eq!(produced, 10);
250        assert!(dec.is_done());
251    }
252
253    #[test]
254    fn chunk_extension_ignored() {
255        let mut dec = ChunkedDecoder::new();
256        // Chunk extensions after ';' should be ignored per RFC 7230
257        let input = b"5;ext=val\r\nhello\r\n0\r\n\r\n";
258        let mut output = [0u8; 64];
259
260        let (_, produced) = dec.decode(input, &mut output).unwrap();
261        assert_eq!(produced, 5);
262        assert_eq!(&output[..5], b"hello");
263        assert!(dec.is_done());
264    }
265
266    #[test]
267    fn empty_body() {
268        let mut dec = ChunkedDecoder::new();
269        let input = b"0\r\n\r\n";
270        let mut output = [0u8; 64];
271
272        let (consumed, produced) = dec.decode(input, &mut output).unwrap();
273        assert_eq!(consumed, input.len());
274        assert_eq!(produced, 0);
275        assert!(dec.is_done());
276    }
277
278    #[test]
279    fn output_buffer_smaller_than_chunk() {
280        let mut dec = ChunkedDecoder::new();
281        let input = b"a\r\n0123456789\r\n0\r\n\r\n";
282        let mut output = [0u8; 4]; // smaller than chunk
283
284        // First call: fills 4 bytes
285        let (consumed1, produced1) = dec.decode(input, &mut output).unwrap();
286        assert_eq!(produced1, 4);
287        assert_eq!(&output[..4], b"0123");
288
289        // Second call with remaining input
290        let (consumed2, produced2) = dec.decode(&input[consumed1..], &mut output).unwrap();
291        assert_eq!(produced2, 4);
292        assert_eq!(&output[..4], b"4567");
293
294        // Third call
295        let (_consumed3, produced3) = dec
296            .decode(&input[consumed1 + consumed2..], &mut output)
297            .unwrap();
298        assert_eq!(produced3, 2);
299        assert_eq!(&output[..2], b"89");
300        assert!(dec.is_done());
301    }
302
303    #[test]
304    fn malformed_hex_rejected() {
305        let mut dec = ChunkedDecoder::new();
306        let input = b"xyz\r\ndata\r\n";
307        let mut output = [0u8; 64];
308
309        assert!(dec.decode(input, &mut output).is_err());
310    }
311
312    #[test]
313    fn trailer_headers_consumed() {
314        let mut dec = ChunkedDecoder::new();
315        let input = b"5\r\nhello\r\n0\r\nTrailer: value\r\nAnother: hdr\r\n\r\n";
316        let mut output = [0u8; 64];
317
318        let (consumed, produced) = dec.decode(input, &mut output).unwrap();
319        assert_eq!(consumed, input.len());
320        assert_eq!(produced, 5);
321        assert_eq!(&output[..5], b"hello");
322        assert!(dec.is_done());
323    }
324
325    #[test]
326    fn trailer_no_headers_just_crlf() {
327        let mut dec = ChunkedDecoder::new();
328        // No trailer headers — just the standard terminator
329        let input = b"3\r\nabc\r\n0\r\n\r\n";
330        let mut output = [0u8; 64];
331
332        let (consumed, produced) = dec.decode(input, &mut output).unwrap();
333        assert_eq!(consumed, input.len());
334        assert_eq!(produced, 3);
335        assert!(dec.is_done());
336    }
337}