1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
use crate::Chunk;

enum ParseState {
    Size,
    Content(usize),
}

/// A single ChunkParser instance used to parse
/// multiple Chunks one after the other
pub struct ChunkParser {
    state: ParseState,
    head: Vec<u8>,
    body: Vec<u8>,
}

/// The maximum chunk size allowed by this parser,
/// this helps prevent attacks that try to send a very
/// large size and cause the server to run out of
/// memory
const MAX_CHUNK_SIZE: usize = 64 * 2usize.pow(20); // 64 Mebibyte

impl ChunkParser {
    /// Creates a new empty Instance of the ChunkParser
    /// that is ready to start parsing Request
    pub fn new() -> ChunkParser {
        Self {
            state: ParseState::Size,
            head: Vec::with_capacity(16),
            body: Vec::new(),
        }
    }

    /// Clears and resets the internal State to allow
    /// the parser to accept, receive and parse a new
    /// chunk without using up extra allocations,
    pub fn clear(&mut self) {
        // Clear the internal buffer
        self.head.clear();
        self.body.clear();

        // Reset the internal state
        self.state = ParseState::Size;
    }

    /// Parses and handles each individual byte
    fn parse_size(&mut self) -> Option<usize> {
        match self.head.last() {
            Some(byte) if *byte != b'\n' => return None,
            None => return None,
            _ => {}
        };

        self.head.pop();
        self.head.pop();

        let head_str = match std::str::from_utf8(&self.head) {
            Ok(t) => t,
            Err(_) => {
                return None;
            }
        };

        let result = match usize::from_str_radix(head_str, 16) {
            Ok(n) => n,
            Err(_) => {
                return None;
            }
        };

        // Safety check to prevent large Chunk sizes from allocating too much memory
        if result > MAX_CHUNK_SIZE {
            return None;
        }

        Some(result)
    }

    /// Parses the given Block of data,
    /// returns the size it parsed as well
    /// as if it is done with parsing
    ///
    /// Returns:
    /// * If it is done and the `finish` function should be called
    /// * The amount of data that is still left in the Buffer (at the end)
    pub fn block_parse(&mut self, data: &[u8]) -> (bool, usize) {
        match self.state {
            ParseState::Size => {
                for (index, tmp) in data.iter().enumerate() {
                    self.head.push(*tmp);
                    if let Some(n_size) = self.parse_size() {
                        let n_state = ParseState::Content(n_size);
                        self.state = n_state;
                        self.body.reserve(n_size);
                        return self.block_parse(&data[index + 1..]);
                    }
                }
                (false, 0)
            }
            ParseState::Content(size) => {
                let body_length = self.body.len();
                let left_to_read = size - body_length;

                let data_length = data.len();
                let read_size = std::cmp::min(left_to_read, data_length);

                self.body.extend_from_slice(&data[..read_size]);
                (
                    self.body.len() >= size,
                    data_length.saturating_sub(read_size + 2),
                )
            }
        }
    }

    /// Finishes the Parsing and returns the
    /// finsihed Chunk
    pub fn finish(&mut self) -> Option<Chunk> {
        let size = match self.state {
            ParseState::Size => return None,
            ParseState::Content(s) => s,
        };

        let body = std::mem::take(&mut self.body);
        Some(Chunk::new(size, body))
    }
}

impl Default for ChunkParser {
    fn default() -> Self {
        Self::new()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn parse_valid_chunk() {
        let content = "9\r\nDeveloper\r\n".as_bytes();

        let mut parser = ChunkParser::new();
        assert_eq!((true, 0), parser.block_parse(&content));

        assert_eq!(
            Some(Chunk::new(9, "Developer".as_bytes().to_vec())),
            parser.finish()
        );
    }
    #[test]
    fn parse_zero_sized_chunk() {
        let content = "0\r\n\r\n".as_bytes();

        let mut parser = ChunkParser::new();
        assert_eq!((true, 0), parser.block_parse(&content));

        assert_eq!(Some(Chunk::new(0, "".as_bytes().to_vec())), parser.finish());
    }

    #[test]
    fn parse_valid_chunk_that_contains_other() {
        let content = "9\r\nDeveloper\r\n0\r\n\r\n".as_bytes();

        let mut parser = ChunkParser::new();
        assert_eq!((true, 5), parser.block_parse(&content));

        assert_eq!(
            Some(Chunk::new(9, "Developer".as_bytes().to_vec())),
            parser.finish()
        );
    }

    #[test]
    fn parse_valid_multiple_chunks() {
        let mut parser = ChunkParser::new();
        assert_eq!((false, 0), parser.block_parse(&"9\r\nDevel".as_bytes()));
        assert_eq!((true, 0), parser.block_parse(&"oper\r\n".as_bytes()));

        assert_eq!(
            Some(Chunk::new(9, "Developer".as_bytes().to_vec())),
            parser.finish()
        );
    }
}