io_http/1.1/coroutines/
chunked-transfer-coding.rs

1//! https://datatracker.ietf.org/doc/html/rfc2616#section-3.6.1
2
3use std::mem;
4
5use io_stream::{coroutines::Read, Io};
6use memchr::memmem;
7
8const CR: u8 = b'\r';
9const CRLF: [u8; 2] = [CR, LF];
10const LF: u8 = b'\n';
11
12const CRLF_CRLF: [u8; 4] = [CR, LF, CR, LF];
13
14#[derive(Debug)]
15pub enum State {
16    ChunkSize,
17    // TODO: use ReadExact from io-stream
18    ChunkData(usize),
19    Trailer,
20}
21
22#[derive(Debug)]
23pub struct ChunkedTransferCoding {
24    read: Read,
25    state: State,
26    buffer: Vec<u8>,
27    body: Vec<u8>,
28}
29
30impl ChunkedTransferCoding {
31    pub fn new(read: impl Into<Read>) -> Self {
32        Self {
33            read: read.into(),
34            state: State::ChunkSize,
35            buffer: Vec::new(),
36            body: Vec::new(),
37        }
38    }
39
40    pub fn extend(&mut self, bytes: impl IntoIterator<Item = u8>) {
41        self.buffer.extend(bytes);
42    }
43
44    pub fn resume(&mut self, mut input: Option<Io>) -> Result<Vec<u8>, Io> {
45        loop {
46            match &mut self.state {
47                State::ChunkSize => {
48                    // chunk = chunk-size [ chunk-extension ] CRLF
49                    //         chunk-data CRLF
50
51                    // find chunk CRLF, otherwise read bytes
52                    let Some(crlf) = memmem::find(&self.buffer, &CRLF) else {
53                        let output = self.read.resume(input.take())?;
54                        self.buffer.extend(output.bytes());
55                        self.read.replace(output.buffer);
56                        continue;
57                    };
58
59                    // search for potential chunk extension
60                    let ext = memchr::memchr(b';', &self.buffer[..crlf]).unwrap_or(crlf);
61
62                    // extract chunk size
63                    let chunk_size = String::from_utf8_lossy(&self.buffer[..ext]);
64                    let Ok(chunk_size) = usize::from_str_radix(&chunk_size, 16) else {
65                        return Err(Io::Error(format!("invalid chunk size: {chunk_size}")));
66                    };
67
68                    // if chunk size is 0, search for trailer
69                    if chunk_size == 0 {
70                        // drain till CRLF excluded, so we can easily
71                        // look for a double CRLF CRLF afterwards
72                        self.buffer.drain(..crlf);
73                        self.state = State::Trailer;
74                        continue;
75                    }
76
77                    // drain till CRLF included
78                    self.buffer.drain(..crlf + CRLF.len());
79
80                    // search for chunk data, including the CRLF
81                    self.state = State::ChunkData(chunk_size + CRLF.len());
82                }
83                State::ChunkData(0) => {
84                    // no more data to extract, remove last CRLF from
85                    // the extracted data then search back for chunk
86                    // size
87                    self.body.drain(self.body.len() - CRLF.len()..);
88                    self.state = State::ChunkSize;
89                }
90                State::ChunkData(_) if self.buffer.is_empty() => {
91                    // empty buffer, read bytes
92                    let output = self.read.resume(input.take())?;
93                    self.buffer.extend(output.bytes());
94                    self.read.replace(output.buffer);
95                }
96                State::ChunkData(size) => {
97                    // extract data from buffer, decrease chunk size
98                    let min_size = self.buffer.len().min(*size);
99                    self.body.extend(self.buffer.drain(..min_size));
100                    *size -= min_size;
101                }
102                State::Trailer => {
103                    // a double CRLF CRLF means the end of trailer
104                    let Some(0) = memmem::rfind(&self.buffer, &CRLF_CRLF) else {
105                        let output = self.read.resume(input.take())?;
106                        self.buffer.extend(output.bytes());
107                        self.read.replace(output.buffer);
108                        continue;
109                    };
110
111                    break Ok(mem::take(&mut self.body));
112                }
113            }
114        }
115    }
116}
117
118#[cfg(test)]
119mod tests {
120    use std::io::{BufReader, Read as _};
121
122    use io_stream::{coroutines::Read, Io, Output};
123
124    use super::ChunkedTransferCoding;
125
126    fn test(encoded: &str, decoded: &str) {
127        let mut reader = BufReader::new(encoded.as_bytes());
128
129        let read = Read::default();
130        let mut http = ChunkedTransferCoding::new(read);
131        let mut arg = None;
132
133        let body = loop {
134            match http.resume(arg.take()) {
135                Ok(body) => break body,
136                Err(Io::Read(Err(mut buffer))) => {
137                    let bytes_count = reader.read(&mut buffer).unwrap();
138
139                    let output = Output {
140                        buffer,
141                        bytes_count,
142                    };
143
144                    arg = Some(Io::Read(Ok(output)))
145                }
146                Err(io) => unreachable!("unexpected I/O: {io:?}"),
147            }
148        };
149
150        assert_eq!(body, decoded.as_bytes());
151    }
152
153    /// Test case from russian Wikipedia page:
154    ///
155    /// https://ru.wikipedia.org/wiki/Chunked_transfer_encoding
156    #[test]
157    fn wiki_ru() {
158        test(
159            concat!(
160                "9\r\n",
161                "chunk 1, \r\n",
162                "7\r\n",
163                "chunk 2\r\n",
164                "0\r\n",
165                "\r\n",
166            ),
167            "chunk 1, chunk 2",
168        );
169    }
170
171    /// Test case from french Wikipedia page:
172    ///
173    /// https://fr.wikipedia.org/wiki/Chunked_transfer_encoding
174    #[test]
175    fn wiki_fr() {
176        test(
177            concat!(
178                "27\r\n",
179                "Voici les données du premier morceau\r\n\r\n",
180                "1C\r\n",
181                "et voici un second morceau\r\n\r\n",
182                "20\r\n",
183                "et voici deux derniers morceaux \r\n",
184                "12\r\n",
185                "sans saut de ligne\r\n",
186                "0\r\n",
187                "\r\n",
188            ),
189            concat!(
190                "Voici les données du premier morceau\r\n",
191                "et voici un second morceau\r\n",
192                "et voici deux derniers morceaux ",
193                "sans saut de ligne",
194            ),
195        );
196    }
197
198    /// Test case from GitHub repository frewsxcv/rust-chunked-transfer:
199    ///
200    /// https://github.com/frewsxcv/rust-chunked-transfer/blob/main/src/decoder.rs
201    #[test]
202    fn github_frewsxcv() {
203        test(
204            "3\r\nhel\r\nb\r\nlo world!!!\r\n0\r\n\r\n",
205            "hello world!!!",
206        );
207    }
208}