stream_httparse/streaming_parser/
chunk_parser.rs

1use crate::Chunk;
2
3enum ParseState {
4    Size,
5    Content(usize),
6}
7
8/// A single ChunkParser instance used to parse
9/// multiple Chunks one after the other
10pub struct ChunkParser {
11    state: ParseState,
12    head: Vec<u8>,
13    body: Vec<u8>,
14}
15
16/// The maximum chunk size allowed by this parser,
17/// this helps prevent attacks that try to send a very
18/// large size and cause the server to run out of
19/// memory
20const MAX_CHUNK_SIZE: usize = 64 * 2usize.pow(20); // 64 Mebibyte
21
22impl ChunkParser {
23    /// Creates a new empty Instance of the ChunkParser
24    /// that is ready to start parsing Request
25    pub fn new() -> ChunkParser {
26        Self {
27            state: ParseState::Size,
28            head: Vec::with_capacity(16),
29            body: Vec::new(),
30        }
31    }
32
33    /// Clears and resets the internal State to allow
34    /// the parser to accept, receive and parse a new
35    /// chunk without using up extra allocations,
36    pub fn clear(&mut self) {
37        // Clear the internal buffer
38        self.head.clear();
39        self.body.clear();
40
41        // Reset the internal state
42        self.state = ParseState::Size;
43    }
44
45    /// Parses and handles each individual byte
46    fn parse_size(&mut self) -> Option<usize> {
47        match self.head.last() {
48            Some(byte) if *byte != b'\n' => return None,
49            None => return None,
50            _ => {}
51        };
52
53        self.head.pop();
54        self.head.pop();
55
56        let head_str = match std::str::from_utf8(&self.head) {
57            Ok(t) => t,
58            Err(_) => {
59                return None;
60            }
61        };
62
63        let result = match usize::from_str_radix(head_str, 16) {
64            Ok(n) => n,
65            Err(_) => {
66                return None;
67            }
68        };
69
70        // Safety check to prevent large Chunk sizes from allocating too much memory
71        if result > MAX_CHUNK_SIZE {
72            return None;
73        }
74
75        Some(result)
76    }
77
78    /// Parses the given Block of data,
79    /// returns the size it parsed as well
80    /// as if it is done with parsing
81    ///
82    /// Returns:
83    /// * If it is done and the `finish` function should be called
84    /// * The amount of data that is still left in the Buffer (at the end)
85    pub fn block_parse(&mut self, data: &[u8]) -> (bool, usize) {
86        match self.state {
87            ParseState::Size => {
88                for (index, tmp) in data.iter().enumerate() {
89                    self.head.push(*tmp);
90                    if let Some(n_size) = self.parse_size() {
91                        let n_state = ParseState::Content(n_size);
92                        self.state = n_state;
93                        self.body.reserve(n_size);
94                        return self.block_parse(&data[index + 1..]);
95                    }
96                }
97                (false, 0)
98            }
99            ParseState::Content(size) => {
100                let body_length = self.body.len();
101                let left_to_read = size - body_length;
102
103                let data_length = data.len();
104                let read_size = std::cmp::min(left_to_read, data_length);
105
106                self.body.extend_from_slice(&data[..read_size]);
107                (
108                    self.body.len() >= size,
109                    data_length.saturating_sub(read_size + 2),
110                )
111            }
112        }
113    }
114
115    /// Finishes the Parsing and returns the
116    /// finsihed Chunk
117    pub fn finish(&mut self) -> Option<Chunk> {
118        let size = match self.state {
119            ParseState::Size => return None,
120            ParseState::Content(s) => s,
121        };
122
123        let body = std::mem::take(&mut self.body);
124        Some(Chunk::new(size, body))
125    }
126}
127
128impl Default for ChunkParser {
129    fn default() -> Self {
130        Self::new()
131    }
132}
133
134#[cfg(test)]
135mod tests {
136    use super::*;
137
138    #[test]
139    fn parse_valid_chunk() {
140        let content = "9\r\nDeveloper\r\n".as_bytes();
141
142        let mut parser = ChunkParser::new();
143        assert_eq!((true, 0), parser.block_parse(&content));
144
145        assert_eq!(
146            Some(Chunk::new(9, "Developer".as_bytes().to_vec())),
147            parser.finish()
148        );
149    }
150    #[test]
151    fn parse_zero_sized_chunk() {
152        let content = "0\r\n\r\n".as_bytes();
153
154        let mut parser = ChunkParser::new();
155        assert_eq!((true, 0), parser.block_parse(&content));
156
157        assert_eq!(Some(Chunk::new(0, "".as_bytes().to_vec())), parser.finish());
158    }
159
160    #[test]
161    fn parse_valid_chunk_that_contains_other() {
162        let content = "9\r\nDeveloper\r\n0\r\n\r\n".as_bytes();
163
164        let mut parser = ChunkParser::new();
165        assert_eq!((true, 5), parser.block_parse(&content));
166
167        assert_eq!(
168            Some(Chunk::new(9, "Developer".as_bytes().to_vec())),
169            parser.finish()
170        );
171    }
172
173    #[test]
174    fn parse_valid_multiple_chunks() {
175        let mut parser = ChunkParser::new();
176        assert_eq!((false, 0), parser.block_parse(&"9\r\nDevel".as_bytes()));
177        assert_eq!((true, 0), parser.block_parse(&"oper\r\n".as_bytes()));
178
179        assert_eq!(
180            Some(Chunk::new(9, "Developer".as_bytes().to_vec())),
181            parser.finish()
182        );
183    }
184}