webparse/
helper.rs

1// Copyright 2022 - 2023 Wenmeng See the COPYRIGHT
2// file at the top-level directory of this distribution.
3// 
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8// 
9// Author: tickbh
10// -----
11// Created Date: 2023/08/15 11:30:53
12
13
14use algorithm::buf::{Bt, BtMut, BinaryRef};
15use crate::{WebResult, WebError, byte_map, next, expect, peek, HttpError, StatusCode};
16use super::{Method, Version, HeaderMap, HeaderName, HeaderValue, Scheme};
17
18
19pub struct Helper;
20
21impl Helper {
22    
23    /// Determines if byte is a token char.
24    ///
25    /// > ```notrust
26    /// > token          = 1*tchar
27    /// >
28    /// > tchar          = "!" / "#" / "$" / "%" / "&" / "'" / "*"
29    /// >                / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
30    /// >                / DIGIT / ALPHA
31    /// >                ; any VCHAR, except delimiters
32    /// > ```
33    #[inline]
34    pub fn is_token(b: u8) -> bool {
35        b > 0x1F && b < 0x7F && b != b' '
36    }
37
38
39    #[inline]
40    pub fn is_status_token(b: u8) -> bool {
41        b > 0x1F && b < 0x7F
42    }
43    
44    #[inline]
45    pub fn is_alpha(b: u8) -> bool {
46        if b >= 65 && b <= 90 {
47            return true
48        } else if b >= 97 && b <= 122 {
49            return true
50        } else {
51            return false
52        }
53    }
54
55    pub const DIGIT_0 :u8 = 48;
56
57    #[inline]
58    pub fn is_digit(b: u8) -> bool {
59        if b >= 48 && b <= 57 {
60            return true
61        } else {
62            return false
63        }
64    }
65
66    #[inline]
67    pub fn is_hex(b: u8) -> bool {
68        if b >= 48 && b <= 57 {
69            return true
70        } else if b >= 65 && b <= 70 {
71            return true
72        } else if b >= 97 && b <= 102 {
73            return true
74        } else {
75            return false
76        }
77    }
78
79    pub fn to_hex(b: u8) -> u8 {
80        Self::HEX_MAP[b as usize]
81    }
82
83    #[inline]
84    pub fn convert_hex(b: u8) -> Option<u8> {
85        if b >= 48 && b <= 57 {
86            return Some(b - 48)
87        } else if b >= 65 && b <= 70 {
88            return Some(b - 65 + 10)
89        } else if b >= 97 && b <= 102 {
90            return Some(b - 97 + 10)
91        } else {
92            return None;
93        }
94    }
95
96    const HEX_MAP: [u8; 16] = [b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', 
97                                b'9', b'A', b'B', b'C', b'D', b'E', b'F'];
98
99    // ASCII codes to accept URI string.
100    // i.e. A-Z a-z 0-9 !#$%&'*+-._();:@=,/?[]~^
101    const URI_MAP: [bool; 256] = byte_map![
102        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
103    //  \0                            \n
104        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
105    //  commands
106        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107    //  \w !  "  #  $  %  &  '  (  )  *  +  ,  -  .  /
108        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
109    //  0  1  2  3  4  5  6  7  8  9  :  ;  <  =  >  ?
110        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111    //  @  A  B  C  D  E  F  G  H  I  J  K  L  M  N  O
112        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113    //  P  Q  R  S  T  U  V  W  X  Y  Z  [  \  ]  ^  _
114        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
115    //  `  a  b  c  d  e  f  g  h  i  j  k  l  m  n  o
116        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
117    //  p  q  r  s  t  u  v  w  x  y  z  {  |  }  ~  del
118    //   ====== Extended ASCII (aka. obs-text) ======
119        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
120        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
121        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
122        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
123        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
124        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
125        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
126        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
127    ];
128
129    #[inline]
130    pub fn is_uri_token(b: u8) -> bool {
131        Self::URI_MAP[b as usize]
132    }
133    
134    // ASCII codes to accept URI string.
135    // i.e. A-Z a-z 0-9 &:?/-._~
136    const URITRANS_MAP: [bool; 256] = byte_map![
137        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
138    //  \0                            \n
139        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
140    //  commands
141        0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1,
142    //  \w !  "  #  $  %  &  '  (  )  *  +  ,  -  .  /
143        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1,
144    //  0  1  2  3  4  5  6  7  8  9  :  ;  <  =  >  ?
145        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146    //  @  A  B  C  D  E  F  G  H  I  J  K  L  M  N  O
147        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
148    //  P  Q  R  S  T  U  V  W  X  Y  Z  [  \  ]  ^  _
149        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
150    //  `  a  b  c  d  e  f  g  h  i  j  k  l  m  n  o
151        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
152    //  p  q  r  s  t  u  v  w  x  y  z  {  |  }  ~  del
153    //   ====== Extended ASCII (aka. obs-text) ======
154        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
155        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
156        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
157        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
158        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
159        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
160        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
161        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
162    ];
163
164    #[inline]
165    pub(crate) fn is_not_uritrans(b: u8) -> bool {
166        Self::URITRANS_MAP[b as usize]
167    }
168
169    const HEADER_NAME_MAP: [bool; 256] = byte_map![
170        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
171        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
172        0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0,
173        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
174        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
175        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
176        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
177        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0,
178        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
182        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
183        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
184        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
185        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
186    ];
187
188    #[inline]
189    pub(crate) fn is_header_name_token(b: u8) -> bool {
190        Self::HEADER_NAME_MAP[b as usize]
191    }
192
193    const HEADER_VALUE_MAP: [bool; 256] = byte_map![
194        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
195        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
196        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
197        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
198        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
199        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
200        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
201        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
202        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
203        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
204        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
205        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
206        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
207        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
208        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
209        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
210    ];
211
212
213    #[inline]
214    pub(crate) fn is_header_value_token(b: u8) -> bool {
215        Self::HEADER_VALUE_MAP[b as usize]
216    }
217
218    pub(crate) fn parse_method<B:Bt>(buffer: &mut B) -> WebResult<Method> {
219        let token = Self::parse_token(buffer)?;
220        TryFrom::try_from(token)
221    }
222
223    pub(crate) fn parse_status<B:Bt>(buffer: &mut B) -> WebResult<StatusCode> {
224        let token = Self::parse_token(buffer)?;
225        let status = StatusCode::try_from(token);
226
227
228        status
229    }
230
231    pub(crate) fn parse_version<B:Bt>(buffer: &mut B) -> WebResult<Version> {
232        let token = Self::parse_token(buffer)?;
233        match token {
234            Version::SHTTP10 => Ok(Version::Http10),
235            Version::SHTTP11 => Ok(Version::Http11),
236            Version::SHTTP2 => Ok(Version::Http2),
237            Version::SHTTP3 => Ok(Version::Http3),
238            _ => {
239                Err(WebError::from(HttpError::Version))
240            }
241        }
242    }
243
244    
245    #[inline]
246    pub(crate) fn parse_token_by_func_empty<'a, B: Bt>(buffer: &'a mut B, func: fn(u8)->bool, err: WebError, empty: bool) -> WebResult<&'a str> {
247        let position = {
248            let mut postion = 0;
249            let mut cur = BinaryRef::from(buffer.chunk());
250            loop {
251                if !func(peek!(cur)?) {
252                    break;
253                }
254                next!(cur)?;
255                postion += 1;
256            }
257            postion
258        };
259        if position == 0 {
260            if empty {
261                next!(buffer)?;
262                return Ok("");
263            }
264            return Err(err);
265        } else {
266            let val = unsafe {
267                std::str::from_utf8_unchecked(&buffer.advance_chunk(position))
268            };
269            return Ok(val);
270        }
271
272    }
273
274    #[inline]
275    pub(crate) fn parse_token_by_func<'a, B: Bt>(buffer: &'a mut B, func: fn(u8)->bool, err: WebError) -> WebResult<&'a str> {
276        Self::parse_token_by_func_empty(buffer, func, err, false)
277    }
278
279    #[inline]
280    pub(crate) fn parse_hex<'a, B: Bt>(buffer: &'a mut B) -> WebResult<&'a str> {
281        Self::parse_token_by_func(buffer, Self::is_hex, WebError::from(HttpError::Token))
282    }
283
284    #[inline]
285    pub(crate) fn parse_token<'a, B:Bt>(buffer: &'a mut B) -> WebResult<&'a str> {
286        Self::parse_token_by_func(buffer, Self::is_token, WebError::from(HttpError::Token))
287    }
288
289    #[inline]
290    pub(crate) fn parse_status_token<'a, B:Bt>(buffer: &'a mut B) -> WebResult<&'a str> {
291        Self::parse_token_by_func(buffer, Self::is_status_token, WebError::from(HttpError::Token))
292    }
293
294    #[inline]
295    pub(crate) fn parse_header_name<'a, B:Bt>(buffer: &'a mut B) -> WebResult<HeaderName> {
296        let token = Self::parse_token_by_func(buffer, Self::is_header_name_token, WebError::from(HttpError::HeaderName))?;
297        match HeaderName::from_bytes(token.as_bytes()) {
298            Some(name) => Ok(name),
299            _ => Err(WebError::from(HttpError::from(HttpError::HeaderName)))
300        }
301    }
302
303    #[inline]
304    pub(crate) fn parse_header_value<'a, B:Bt>(buffer: &'a mut B) -> WebResult<HeaderValue> {
305        let token = Self::parse_token_by_func_empty(buffer, Self::is_header_value_token, WebError::from(HttpError::HeaderValue), true)?;
306        Ok(HeaderValue::Value(token.as_bytes().to_vec()))
307    }
308
309    #[inline]
310    pub(crate) fn parse_scheme<'a, B:Bt>(buffer: &'a mut B) -> WebResult<&'a str> {
311        let token = Self::parse_token_by_func(buffer, Scheme::is_scheme_token, WebError::from(HttpError::HeaderValue))?;
312        Ok(token)
313    }
314
315    #[inline]
316    pub fn skip_new_line<B:Bt>(buffer: &mut B) -> WebResult<()> {
317        match next!(buffer)? {
318            b'\r' => {
319                expect!(buffer.next() == b'\n' => Err(WebError::from(HttpError::NewLine)));
320            },
321            b'\n' => {
322            },
323            b' ' => {
324            },
325            _ => return Err(WebError::from(HttpError::Partial))
326        };
327        Ok(())
328    }
329
330    #[inline]
331    pub(crate) fn skip_empty_lines<B: Bt>(buffer: &mut B) -> WebResult<()> {
332        loop {
333            let b = buffer.peek();
334            match b {
335                Some(b'\r') => {
336                    next!(buffer)?;
337                    expect!(buffer.next() == b'\n' => Err(WebError::from(HttpError::NewLine)));
338                }
339                Some(b'\n') => {
340                    next!(buffer)?;
341                }
342                Some(..) => {
343                    return Ok(());
344                }
345                None => return Err(WebError::from(HttpError::Partial)),
346            }
347        }
348    }
349
350    #[inline]
351    pub(crate) fn skip_spaces<B:Bt>(buffer: &mut B) -> WebResult<()> {
352        loop {
353            let b = buffer.peek();
354            match b {
355                Some(b' ') => {
356                    next!(buffer)?;
357                }
358                Some(..) => {
359                    return Ok(());
360                }
361                None => return Err(WebError::from(HttpError::Partial)),
362            }
363        }
364    }
365    
366    #[inline]
367    pub(crate) fn parse_header<B:Bt>(buffer: &mut B, header: &mut HeaderMap) -> WebResult<()> {
368        header.clear();
369
370        loop {
371            let b = peek!(buffer)?;
372            if b == b'\r' {
373                buffer.get_next();
374                expect!(buffer.next() == b'\n' => Err(WebError::from(HttpError::NewLine)));
375                return Ok(());
376            }
377            if b == b'\n' {
378                buffer.get_next();
379                return Ok(());
380            }
381
382            let name = Helper::parse_header_name(buffer)?;
383            Self::skip_spaces(buffer)?;
384            expect!(buffer.next() == b':' => Err(WebError::from(HttpError::HeaderName)));
385            Self::skip_spaces(buffer)?;
386            let value = Helper::parse_header_value(buffer)?;
387            Self::skip_new_line(buffer)?;
388            header.insert(name, value);
389        }
390    }
391
392    pub fn parse_chunk_data<'a, B:Bt>(buffer: &'a mut B) -> WebResult<(usize, usize)> {
393        let len = buffer.remaining();
394        let mut val = BinaryRef::from(buffer.chunk());
395        let num = Helper::parse_hex(&mut val)?;
396        let num = usize::from_str_radix(num, 16).unwrap();
397        Helper::skip_new_line(&mut val)?;
398        if num + 2 > val.remaining() {
399            return Err(WebError::Http(HttpError::Partial));
400        }
401        return Ok((len - val.remaining(), num));
402
403        // let ret = buffer.chunk()[..num].to_vec();
404        // buffer.advance(num);
405        // Helper::skip_new_line(buffer)?;
406        // Ok((ret, buffer.mark_commit() - first, num == 0))
407    }
408
409    pub fn encode_chunk_data<B:Bt+BtMut>(buffer: &mut B, data: &[u8]) -> std::io::Result<usize> {
410        let len_str = format!("{:x}", data.len());
411        let mut size = buffer.put_slice(len_str.as_bytes());
412        size += buffer.put_slice("\r\n".as_bytes());
413        size += buffer.put_slice(data);
414        size += buffer.put_slice("\r\n".as_bytes());
415        Ok(size)
416    }
417
418    #[inline]
419    pub fn hex_to_vec(s: &str) -> Vec<u8> {
420        let mut result = vec![];
421        let bytes = s.as_bytes();
422        let mut val = 0;
423        let mut is_first = true;
424        for b in bytes {
425            if b != &b' ' {
426                if is_first {
427                    val = u8::from_str_radix(std::str::from_utf8(&[*b]).unwrap(), 16).unwrap();
428                    is_first = false
429                } else {
430                    val = val * 16 + u8::from_str_radix(std::str::from_utf8(&[*b]).unwrap(), 16).unwrap();
431                    result.push(val);
432                    val = 0;
433                    is_first = true;
434                }
435            }
436        }
437        result
438    }
439
440
441    pub fn eq_bytes_ignore_ascii_case(a: &[u8], b: &[u8]) -> bool {
442        if a.len() != b.len() {
443            return false;
444        }
445        for i in 0..a.len() {
446            if a[i] == b[i] {
447                continue;
448            }
449            let wrap = a[i].wrapping_sub(b[i]);
450            if wrap != 32 && wrap != 224 {
451                return false;
452            }
453        }
454        true
455    }
456    
457    pub fn eq_bytes(a: &[u8], b: &[u8]) -> bool {
458        if a.len() != b.len() {
459            return false;
460        }
461        a == b
462    }
463    
464    pub fn contains_bytes(a: &[u8], b: &[u8]) -> bool {
465        if a.len() < b.len() {
466            return false;
467        }
468        for i in 0..(a.len() - b.len() + 1) {
469            if &a[i..(i + b.len())] == b {
470                return true;
471            }
472        }
473        false
474    }
475}