Skip to main content

milo_parser/
parse.rs

1#![allow(clippy::not_unsafe_ptr_arg_deref)]
2
3extern crate alloc;
4
5use alloc::vec::Vec;
6use alloc::{boxed::Box, format};
7use core::cell::{Cell, RefCell};
8use core::ffi::{c_char, c_uchar, c_void};
9use core::fmt::Debug;
10use core::ptr;
11use core::str;
12use core::{slice, slice::from_raw_parts};
13use std::string;
14#[cfg(all(not(target_family = "wasm"), any(debug_assertions, feature = "debug")))]
15use std::time::Instant;
16
17use milo_macros::*;
18
19use crate::Methods::CONNECT;
20use crate::matchers::*;
21use crate::*;
22
23impl Parser {
24  /// Parses a slice of characters.
25  ///
26  /// It returns the number of consumed characters.
27  pub fn parse(&mut self, input: *const c_uchar, limit: usize) -> usize {
28    // If the self.is paused, this is a no-op
29    if self.paused {
30      return 0;
31    }
32
33    let input = unsafe { from_raw_parts(input, limit) };
34
35    // Set the data to analyze, prepending unconsumed data from previous iteration
36    // if needed
37    let mut limit = limit;
38    let aggregate: Vec<c_uchar>;
39    let unconsumed_len = self.unconsumed_len;
40
41    let mut data = if self.manage_unconsumed && unconsumed_len > 0 {
42      unsafe {
43        limit += unconsumed_len;
44        let unconsumed = from_raw_parts(self.unconsumed, unconsumed_len);
45
46        // Rebuild a contiguous view so state handlers can scan across parse-call
47        // boundaries.
48        aggregate = [unconsumed, input].concat();
49        &aggregate[..]
50      }
51    } else {
52      input
53    };
54
55    // Limit the data that is currently analyzed
56    data = &data[..limit];
57    let mut available = data.len();
58
59    #[cfg(all(not(target_family = "wasm"), any(debug_assertions, feature = "debug")))]
60    let mut last = Instant::now();
61
62    #[cfg(all(not(target_family = "wasm"), any(debug_assertions, feature = "debug")))]
63    let start = Instant::now();
64
65    #[cfg(any(debug_assertions, feature = "debug"))]
66    let mut previous_state = self.state;
67
68    #[cfg(any(debug_assertions, feature = "debug"))]
69    let previous_position = self.position;
70
71    // States will advance position manually, the parser has to explicitly
72    // track it
73    self.position = 0;
74    let mut advanced: usize;
75    let mut parsing = true;
76    let has_active_callbacks = self.active_callbacks != 0;
77    let has_header_name_callback = self.active_callbacks & CALLBACK_ACTIVE_ON_HEADER_NAME != 0;
78    let has_header_value_callback = self.active_callbacks & CALLBACK_ACTIVE_ON_HEADER_VALUE != 0;
79    let has_trailer_name_callback = self.active_callbacks & CALLBACK_ACTIVE_ON_TRAILER_NAME != 0;
80    let has_trailer_value_callback = self.active_callbacks & CALLBACK_ACTIVE_ON_TRAILER_VALUE != 0;
81
82    #[cfg(any(debug_assertions, feature = "debug"))]
83    if self.debug {
84      eprintln!("[milo_parser::debug] loop enter");
85    }
86
87    // Until there is data or there is a request to continue
88    'parser: while parsing && (!self.paused) && (available != 0 || self.continue_without_data) {
89      #[cfg(any(debug_assertions, feature = "debug"))]
90      if self.debug {
91        eprintln!(
92          "[milo_parser::debug] loop before processing: previous_position={}, position={}, available={}, \
93           continue_without_data={}",
94          previous_position, self.position, available, self.continue_without_data
95        );
96      }
97
98      // Reset the flag before processing; states set it again when they need another
99      // zero-byte turn.
100      self.continue_without_data = false;
101      advanced = 0;
102
103      'state: {
104        match self.state {
105          // If the parser has finished and it receives more data, error
106          STATE_FINISH => {
107            fail!(UNEXPECTED_CHARACTER, "Unexpected data");
108          }
109
110          // The error state is a no-op
111          STATE_ERROR => {
112            suspend!();
113          }
114
115          // Choose the initial state depending on the configured message type.
116          STATE_START => {
117            if !self.autodetect && self.is_request {
118              if has_active_callbacks {
119                callback!(on_request);
120                callback!(on_message_start);
121              }
122              move_to!(request_line);
123            } else if !self.autodetect {
124              if has_active_callbacks {
125                callback!(on_response);
126                callback!(on_message_start);
127              }
128              move_to!(status_line);
129            } else if data.len() >= 5 && data[4] == b'/' && data.starts_with(b"HTTP") {
130              self.is_request = false;
131              if has_active_callbacks {
132                callback!(on_response);
133                callback!(on_message_start);
134              }
135              move_to!(status_line);
136            } else if data.len() >= 2 && data.starts_with(b"\r\n") {
137              // RFC 9112 section 2.2
138              advance!(2);
139            } else {
140              // For performance reason, we assume it's a request so we don't lookup the
141              // method twice
142              self.is_request = true;
143              if has_active_callbacks {
144                callback!(on_request);
145                callback!(on_message_start);
146              }
147              move_to!(request_line);
148            }
149          }
150
151          STATE_REQUEST_LINE => {
152            match find_cr(data, available) {
153              // // RFC 9112 section 3
154              Some(cr) => {
155                match ensure_valid_line(data, cr, available) {
156                  MatchResult::Continue => {}
157                  MatchResult::Suspend => {
158                    suspend!();
159                  }
160                  MatchResult::Stop => {
161                    fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
162                  }
163                }
164
165                // RFC 9112 section 2.2 - Repeated
166                if cr == 0 {
167                  advance!(2);
168                  next!();
169                } else if cr < 14
170                // Length of "GET / HTTP/1.1"
171                {
172                  fail!(UNEXPECTED_CHARACTER, "Request line too short");
173                }
174
175                // The line is potentially valid, clear the parser
176                self.clear();
177
178                // RFC 9112 section 3.1
179                let method_start = 0;
180                let method_end = match find_char(data, method_start, cr, b' ') {
181                  Some(index) if index > method_start => index,
182                  _ => {
183                    fail!(UNEXPECTED_CHARACTER, "Expected space after method");
184                  }
185                };
186
187                // RFC 9112 section 3.2
188                let url_start = method_end + 1;
189                let url_end = match find_char(data, url_start, cr, b' ') {
190                  Some(index) if index > url_start => index,
191                  _ => {
192                    fail!(UNEXPECTED_CHARACTER, "Expected space after URL");
193                  }
194                };
195
196                // RFC 9112 section 2.3
197                let protocol_start = url_end + 1;
198                let protocol_end = match find_char(data, protocol_start, cr, b'/') {
199                  Some(index) if index > protocol_start => index,
200                  _ => {
201                    fail!(UNEXPECTED_CHARACTER, "Expected / after the protocol name");
202                  }
203                };
204
205                let method_slice = &data[method_start..method_end];
206                self.method = match method_slice.len() {
207                  3 => {
208                    match method_slice {
209                      b"GET" => METHOD_GET,
210                      b"PUT" => METHOD_PUT,
211                      b"PRI" => METHOD_PRI,
212                      _ => METHOD_OTHER,
213                    }
214                  }
215                  4 => {
216                    match method_slice {
217                      b"HEAD" => METHOD_HEAD,
218                      b"POST" => METHOD_POST,
219                      _ => METHOD_OTHER,
220                    }
221                  }
222                  5 => {
223                    match method_slice {
224                      b"PATCH" => METHOD_PATCH,
225                      b"TRACE" => METHOD_TRACE,
226                      _ => METHOD_OTHER,
227                    }
228                  }
229                  6 => {
230                    match method_slice {
231                      b"DELETE" => METHOD_DELETE,
232                      _ => METHOD_OTHER,
233                    }
234                  }
235                  7 => {
236                    match method_slice {
237                      b"CONNECT" => {
238                        self.is_connect = true;
239                        METHOD_CONNECT
240                      }
241                      b"OPTIONS" => METHOD_OPTIONS,
242                      _ => METHOD_OTHER,
243                    }
244                  }
245                  _ => METHOD_OTHER,
246                };
247
248                if self.method == METHOD_OTHER && !validate_token(data, method_start, method_end) {
249                  fail!(UNEXPECTED_CHARACTER, "Invalid method character");
250                }
251
252                if !validate_url(data, url_start, url_end) {
253                  fail!(UNEXPECTED_CHARACTER, "Invalid URL character");
254                }
255
256                let version_start = protocol_end + 1;
257                if cr != protocol_start + 8 {
258                  fail!(UNEXPECTED_CHARACTER, "Invalid protocol name");
259                }
260
261                if &data[protocol_start..cr] == b"HTTP/1.1" {
262                  if self.method == METHOD_PRI {
263                    fail!(UNSUPPORTED_HTTP_VERSION, "PRI is only valid with HTTP/2.0");
264                  }
265
266                  self.version_major = 1;
267                  self.version_minor = 1;
268                } else if &data[protocol_start..cr] == b"HTTP/2.0" {
269                  if self.method != METHOD_PRI {
270                    fail!(UNSUPPORTED_HTTP_VERSION, "Unsupported HTTP version");
271                  }
272
273                  self.version_major = 2;
274                  self.version_minor = 0;
275                } else {
276                  fail!(UNEXPECTED_CHARACTER, "Invalid protocol");
277                }
278
279                if has_active_callbacks {
280                  callback!(on_method, method_start, method_end - method_start);
281                  callback!(on_url, url_start, url_end - url_start);
282                  callback!(on_protocol, protocol_start, protocol_end - protocol_start);
283                  callback!(on_version, version_start, 3);
284                }
285
286                advance!(cr + 2);
287
288                if self.method == METHOD_PRI {
289                  move_to!(http2_preface);
290                } else {
291                  move_to!(header);
292                }
293              }
294              None => {
295                if available >= self.max_start_line_length {
296                  fail!(UNEXPECTED_CHARACTER, "Request line too long");
297                } else {
298                  suspend!();
299                }
300              }
301            }
302          }
303
304          // RFC 9112 section 4
305          STATE_STATUS_LINE => {
306            match find_cr(data, available) {
307              Some(cr) => {
308                match ensure_valid_line(data, cr, available) {
309                  MatchResult::Continue => {}
310                  MatchResult::Suspend => {
311                    suspend!();
312                  }
313                  MatchResult::Stop => {
314                    fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
315                  }
316                }
317
318                // RFC 9112 section 2.2 - Repeated
319                if cr == 0 {
320                  advance!(2);
321                  next!();
322                } else if cr < 13
323                // Length of "HTTP/1.1 200 "
324                {
325                  fail!(UNEXPECTED_CHARACTER, "Status line too short");
326                }
327
328                // The line is potentially valid, clear the parser
329                self.clear();
330
331                let protocol_start = 0;
332                let protocol_end = 4;
333                let version_start = protocol_end + 1;
334                let version_end = protocol_start + 8;
335
336                if cr < version_end || data[version_end] != b' ' {
337                  fail!(UNEXPECTED_CHARACTER, "Expected space after protocol");
338                }
339
340                match &data[protocol_start..version_end] {
341                  b"HTTP/1.1" => {
342                    self.version_major = 1;
343                    self.version_minor = 1;
344                  }
345                  [b'H', b'T', b'T', b'P', b'/', ..] => {
346                    fail!(UNSUPPORTED_HTTP_VERSION, "Unsupported HTTP version");
347                  }
348                  _ => {
349                    fail!(UNEXPECTED_CHARACTER, "Invalid protocol");
350                  }
351                }
352
353                let status_start = version_end + 1;
354                // Even if the reason is empty, there must be at least a space after the status
355                // code. So three digits plus a space
356                let status_end = version_end + 5;
357                if status_end > cr {
358                  fail!(INVALID_STATUS, "Expected HTTP response status");
359                }
360
361                if !is_digit(data[status_start])
362                  || !is_digit(data[status_start + 1])
363                  || !is_digit(data[status_start + 2])
364                {
365                  fail!(INVALID_STATUS, "Invalid HTTP response status");
366                }
367
368                if data[status_start + 3] != b' ' {
369                  fail!(INVALID_STATUS, "Expected a space after HTTP response status");
370                }
371
372                let reason_start = status_start + 4;
373                let reason_end = cr;
374                if reason_start != reason_end
375                  && unsafe { !validate_token_value(data.as_ptr().add(reason_start), reason_end - reason_start) }
376                {
377                  fail!(UNEXPECTED_CHARACTER, "Invalid status reason character");
378                }
379
380                self.status = ((data[status_start] - b'0') as u32) * 100
381                  + ((data[status_start + 1] - b'0') as u32) * 10
382                  + (data[status_start + 2] - b'0') as u32;
383
384                if has_active_callbacks {
385                  callback!(on_protocol, protocol_start, 4);
386                  callback!(on_version, version_start, 3);
387                  callback!(on_status, status_start, 3);
388                  if reason_end > reason_start {
389                    callback!(on_reason, reason_start, reason_end - reason_start);
390                  }
391                }
392
393                advance!(cr + 2);
394                move_to!(header);
395              }
396              None => {
397                if available >= self.max_start_line_length {
398                  fail!(UNEXPECTED_CHARACTER, "Status line too long");
399                } else {
400                  suspend!();
401                }
402              }
403            }
404          }
405
406          STATE_HTTP2_PREFACE => {
407            if available < 8 {
408              suspend!();
409            } else if &data[..8] == b"\r\nSM\r\n\r\n" {
410              advance!(8);
411              move_to!(tunnel);
412            } else {
413              fail!(UNEXPECTED_CHARACTER, "Malformed HTTP/2.0 preface");
414            }
415          }
416
417          STATE_HEADER => {
418            match find_header_line_end(data.as_ptr(), available) {
419              HeaderLineScanResult::Cr(cr) => {
420                match ensure_valid_line(data, cr, available) {
421                  MatchResult::Continue => {}
422                  MatchResult::Suspend => {
423                    suspend!();
424                  }
425                  MatchResult::Stop => {
426                    fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
427                  }
428                }
429
430                // No more headers or no headers at all, move to the headers state
431                if cr == 0 {
432                  self.continue_without_data = true;
433                  advance!(2);
434                  move_to!(body_decision);
435                  next!();
436                }
437
438                // RFC 9112 section.4
439                // RFC 9110 section 5.5 and 5.6
440                let header_name_start = 0;
441                let header_name_end = match find_char(data, header_name_start, cr, b':') {
442                  Some(index) if index > header_name_start => index,
443                  _ => {
444                    fail!(UNEXPECTED_CHARACTER, "Invalid header field name character");
445                  }
446                };
447                let mut header_value_start = header_name_end + 1;
448                let mut header_value_end = cr;
449
450                let status = self.status;
451                let first_header_byte = data[header_name_start];
452                if !matches!(first_header_byte, b'c' | b'C' | b't' | b'T' | b'u' | b'U') {
453                  if !validate_token(data, header_name_start, header_name_end) {
454                    fail!(UNEXPECTED_CHARACTER, "Invalid header field name character");
455                  }
456
457                  if has_header_value_callback {
458                    strip_ows_fast(data, &mut header_value_start, &mut header_value_end, true);
459                  }
460                } else {
461                  let header_name_len = header_name_end - header_name_start;
462                  match (header_name_len, &data[header_name_start..header_name_end]) {
463                    // RFC 9112 section 6.2
464                    (14, case_insensitive_string!("content-length")) => {
465                      if self.has_transfer_encoding {
466                        fail!(
467                          UNEXPECTED_CONTENT_LENGTH,
468                          "Unexpected Content-Length header when Transfer-Encoding header is present"
469                        );
470                      } else if status == 205 || status == 204 || status / 100 == 1 {
471                        fail!(
472                          UNEXPECTED_CONTENT_LENGTH,
473                          "Unexpected Content-Length header for a response without body"
474                        );
475                      } else if self.has_content_length {
476                        fail!(INVALID_CONTENT_LENGTH, "Invalid duplicate Content-Length header");
477                      }
478
479                      if header_value_start < cr && !is_ws(data[cr - 1]) {
480                        let value_start = if data[header_value_start] == b' ' {
481                          header_value_start + 1
482                        } else {
483                          header_value_start
484                        };
485
486                        if value_start < cr && !is_ws(data[value_start]) {
487                          header_value_start = value_start;
488                        } else if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
489                          fail!(UNEXPECTED_CHARACTER, "Expected Content-Length header value");
490                        }
491                      } else if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
492                        fail!(UNEXPECTED_CHARACTER, "Expected Content-Length header value");
493                      }
494
495                      let mut i = header_value_start;
496                      let mut content_length = 0u64;
497
498                      if header_value_end - header_value_start > 19 {
499                        // Milo caps Content-Length at 19 digits as a practical limit. This keeps
500                        // parsing overflow-safe while allowing values far
501                        // beyond realistic message sizes.
502                        fail!(INVALID_CONTENT_LENGTH, "Invalid Content-Length header");
503                      }
504
505                      while i < header_value_end {
506                        let current = data[i];
507                        if !is_digit(current) {
508                          fail!(INVALID_CONTENT_LENGTH, "Invalid Content-Length header");
509                        }
510
511                        content_length = content_length * 10 + (current - b'0') as u64;
512                        i += 1;
513                      }
514
515                      self.has_content_length = true;
516                      self.content_length = content_length;
517                      self.remaining_content_length = content_length;
518                    }
519                    // RFC 9112 section 6.1
520                    (17, case_insensitive_string!("transfer-encoding")) => {
521                      if self.has_content_length {
522                        fail!(
523                          UNEXPECTED_TRANSFER_ENCODING,
524                          "Unexpected Transfer-Encoding header when Content-Length header is present"
525                        );
526                      } else if status == 304 || status == 205 || status == 204 || status / 100 == 1 {
527                        fail!(
528                          UNEXPECTED_TRANSFER_ENCODING,
529                          "Unexpected Transfer-Encoding header for a response without body"
530                        );
531                      }
532
533                      if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
534                        fail!(UNEXPECTED_CHARACTER, "Expected Transfer-Encoding header value");
535                      }
536
537                      self.has_transfer_encoding = true;
538
539                      if &data[header_value_start..header_value_end] == b"chunked" {
540                        // If this is true, it means the Transfer-Encoding header was specified more
541                        // than once. This is the second repetition and therefore, the previous one is
542                        // no longer the last one, making it invalid.
543                        if self.has_chunked_transfer_encoding {
544                          fail!(
545                            INVALID_TRANSFER_ENCODING,
546                            "The value \"chunked\" in the Transfer-Encoding header must be the last provided and can \
547                             be provided only once"
548                          );
549                        }
550
551                        self.has_chunked_transfer_encoding = true;
552                      } else {
553                        let mut token_start = header_value_start;
554                        loop {
555                          while token_start < header_value_end && is_ws(data[token_start]) {
556                            token_start += 1;
557                          }
558
559                          if token_start == header_value_end {
560                            break;
561                          }
562
563                          let token_end_raw = match find_char(data, token_start, header_value_end, b',') {
564                            Some(comma) => comma,
565                            None => header_value_end,
566                          };
567                          let mut token_end = token_end_raw;
568
569                          if !strip_ows_fast(data, &mut token_start, &mut token_end, false) {
570                            fail!(UNEXPECTED_CHARACTER, "Expected Transfer-Encoding header value");
571                          }
572
573                          self.has_transfer_encoding = true;
574
575                          if let case_insensitive_string!("chunked") = data[token_start..token_end] {
576                            // If this is true, it means the Transfer-Encoding header was specified more
577                            // than once. This is the second repetition and therefore, the previous one is
578                            // no longer the last one, making it invalid.
579                            if self.has_chunked_transfer_encoding {
580                              fail!(
581                                INVALID_TRANSFER_ENCODING,
582                                "The value \"chunked\" in the Transfer-Encoding header must be the last provided and \
583                                 can be provided only once"
584                              );
585                            }
586
587                            self.has_chunked_transfer_encoding = true;
588                          } else {
589                            if self.has_chunked_transfer_encoding {
590                              // Any other value when chunked was already specified is invalid as the previous
591                              // chunked would not be the last one anymore
592                              fail!(
593                                INVALID_TRANSFER_ENCODING,
594                                "The value \"chunked\" in the Transfer-Encoding header must be the last provided"
595                              );
596                            }
597                          }
598
599                          if token_end_raw == header_value_end {
600                            break;
601                          } else {
602                            token_start = token_end_raw + 1;
603                          }
604                        }
605                      }
606                    }
607                    // RFC 9112 section 9.6
608                    (10, case_insensitive_string!("connection")) => {
609                      if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
610                        fail!(UNEXPECTED_CHARACTER, "Expected Connection header value");
611                      }
612
613                      match data[header_value_start..header_value_end] {
614                        case_insensitive_string!("close") => {
615                          self.has_connection_close = true;
616                        }
617                        case_insensitive_string!("keep-alive") => {
618                          // Keep-alive is implicit unless Connection: close is
619                          // present.
620                        }
621                        case_insensitive_string!("upgrade") => {
622                          self.has_connection_upgrade = true;
623                        }
624                        _ => {
625                          // Comma separated values
626                          let mut token_start = header_value_start;
627                          loop {
628                            while token_start < header_value_end && is_ws(data[token_start]) {
629                              token_start += 1;
630                            }
631
632                            if token_start == header_value_end {
633                              break;
634                            }
635
636                            let token_end_raw = match find_char(data, token_start, header_value_end, b',') {
637                              Some(comma) => comma,
638                              None => header_value_end,
639                            };
640                            let mut token_end = token_end_raw;
641
642                            if !strip_ows_fast(data, &mut token_start, &mut token_end, false) {
643                              fail!(UNEXPECTED_CHARACTER, "Expected Connection header value");
644                            }
645
646                            match data[token_start..token_end] {
647                              case_insensitive_string!("close") => {
648                                self.has_connection_close = true;
649                              }
650                              case_insensitive_string!("upgrade") => {
651                                self.has_connection_upgrade = true;
652                              }
653                              case_insensitive_string!("keep-alive") => {}
654                              _ => {
655                                if !validate_token(data, token_start, token_end) {
656                                  fail!(UNEXPECTED_CHARACTER, "Invalid Connection header value");
657                                }
658                              }
659                            }
660
661                            if token_end_raw == header_value_end {
662                              break;
663                            } else {
664                              token_start = token_end_raw + 1;
665                            }
666                          }
667                        }
668                      }
669                    }
670                    (7, case_insensitive_string!("trailer")) => {
671                      self.has_trailers = true;
672
673                      if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
674                        fail!(UNEXPECTED_CHARACTER, "Expected Trailer header value");
675                      }
676                    }
677                    (7, case_insensitive_string!("upgrade")) => {
678                      if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
679                        fail!(UNEXPECTED_CHARACTER, "Expected Upgrade header value");
680                      }
681
682                      let mut token_start = header_value_start;
683                      loop {
684                        while token_start < header_value_end && is_ws(data[token_start]) {
685                          token_start += 1;
686                        }
687
688                        if token_start == header_value_end {
689                          break;
690                        }
691
692                        let token_end_raw = match find_char(data, token_start, header_value_end, b',') {
693                          Some(comma) => comma,
694                          None => header_value_end,
695                        };
696                        let mut token_end = token_end_raw;
697
698                        if !strip_ows_fast(data, &mut token_start, &mut token_end, false) {
699                          fail!(UNEXPECTED_CHARACTER, "Expected Upgrade header value");
700                        }
701
702                        let protocol_name_end = find_char(data, token_start, token_end, b'/').unwrap_or(token_end);
703                        if !validate_token(data, token_start, protocol_name_end) {
704                          fail!(UNEXPECTED_CHARACTER, "Invalid Upgrade header value");
705                        }
706
707                        if protocol_name_end < token_end {
708                          let protocol_version_start = protocol_name_end + 1;
709                          if find_char(data, protocol_version_start, token_end, b'/').is_some()
710                            || !validate_token(data, protocol_version_start, token_end)
711                          {
712                            fail!(UNEXPECTED_CHARACTER, "Invalid Upgrade header value");
713                          }
714                        }
715
716                        if token_end_raw == header_value_end {
717                          break;
718                        } else {
719                          token_start = token_end_raw + 1;
720                        }
721                      }
722
723                      self.has_upgrade = true;
724                    }
725                    _ => {
726                      if !validate_token(data, header_name_start, header_name_end) {
727                        fail!(UNEXPECTED_CHARACTER, "Invalid header field name character");
728                      }
729
730                      if has_header_value_callback {
731                        strip_ows_fast(data, &mut header_value_start, &mut header_value_end, true);
732                      }
733                    }
734                  }
735                }
736
737                if has_header_name_callback {
738                  callback!(on_header_name, header_name_start, header_name_end - header_name_start);
739                }
740
741                if has_header_value_callback {
742                  callback!(
743                    on_header_value,
744                    header_value_start,
745                    header_value_end - header_value_start
746                  );
747                }
748
749                advance!(cr + 2);
750              }
751              HeaderLineScanResult::Invalid(invalid) => {
752                match find_char(data, 0, invalid, b':') {
753                  Some(_) => {
754                    fail!(UNEXPECTED_CHARACTER, "Invalid header field value character");
755                  }
756                  None => {
757                    fail!(UNEXPECTED_CHARACTER, "Invalid header field name character");
758                  }
759                }
760              }
761              HeaderLineScanResult::Incomplete => {
762                if available >= self.max_header_length {
763                  fail!(UNEXPECTED_CHARACTER, "Header line too long");
764                } else {
765                  suspend!();
766                }
767              }
768            }
769          }
770
771          // RFC 9110 section 9.3.6 and 7.8 - Headers have finished, check if the
772          // connection must be upgraded or a body is expected.
773          STATE_BODY_DECISION => {
774            if has_active_callbacks {
775              callback!(on_headers);
776            }
777
778            let method = self.method;
779            let status = self.status;
780
781            if self.has_upgrade && !self.has_connection_upgrade {
782              fail!(
783                MISSING_CONNECTION_UPGRADE,
784                "Missing Connection header set to \"upgrade\" when using the Upgrade header"
785              );
786            }
787
788            if self.has_trailers && !self.has_chunked_transfer_encoding {
789              fail!(
790                UNEXPECTED_TRAILERS,
791                "Trailers are not allowed when not using chunked transfer encoding"
792              );
793            } else if self.is_request && (method == METHOD_GET || method == METHOD_HEAD) && self.content_length > 0 {
794              fail!(UNEXPECTED_CONTENT, "Unexpected content for the request (GET or HEAD)");
795            }
796
797            // In case of Connection: Upgrade or a CONNECT method
798            if self.is_connect {
799              // In case of CONNECT method
800              callback!(on_connect);
801              move_to!(tunnel);
802            } else if self.has_upgrade && !self.is_request && status == 101 {
803              callback!(on_upgrade);
804              move_to!(tunnel);
805            } else if self.is_request {
806              if self.has_transfer_encoding && !self.has_chunked_transfer_encoding {
807                fail!(
808                  UNEXPECTED_CONTENT_LENGTH,
809                  "Transfer-Encoding last header value must be \"chunked\" if the header is present"
810                );
811              } else if self.skip_body {
812                self.continue_without_data = true;
813                self.complete(0);
814              } else if self.has_content_length {
815                // RFC 9110 section 6.3
816                if self.content_length == 0 {
817                  self.continue_without_data = true;
818                  self.complete(0);
819                } else {
820                  move_to!(body_via_content_length);
821                }
822              } else if !self.has_chunked_transfer_encoding {
823                self.continue_without_data = true;
824                self.complete(0);
825              } else {
826                move_to!(chunk_header);
827              }
828            } else {
829              // Response
830              // RFC 9110 section 15.4.5
831              if self.skip_body || (status < 200 && status != 101) || status == 204 || status == 205 || status == 304 {
832                self.continue_without_data = true;
833                self.complete(0);
834              } else if self.has_content_length {
835                if self.content_length == 0 {
836                  self.continue_without_data = true;
837                  self.complete(0);
838                } else {
839                  move_to!(body_via_content_length);
840                }
841              } else if self.has_chunked_transfer_encoding {
842                move_to!(chunk_header);
843              } else {
844                move_to!(body_with_no_length);
845              }
846            }
847          }
848
849          // RFC 9112 section 6.2
850          STATE_BODY_VIA_CONTENT_LENGTH => {
851            let expected = self.remaining_content_length;
852            let available_64 = available as u64;
853
854            // Less data than what it is expected
855            if available_64 < expected {
856              self.remaining_content_length -= available_64;
857
858              callback!(on_data, 0, available);
859              advance!(available);
860            } else {
861              self.remaining_content_length = 0;
862
863              callback!(on_data, 0, expected as usize);
864              callback!(on_body, expected as usize, 0);
865
866              self.continue_without_data = true;
867
868              advance!(expected as usize);
869              self.complete(expected as usize);
870            }
871          }
872
873          // RFC 9110 section 6.3 - Body with no length nor chunked encoding.
874          // This is only allowed in responses.
875          // Note that on_body can't and will not be called here as there is no way to
876          // know when the response finishes.
877          STATE_BODY_WITH_NO_LENGTH => {
878            callback!(on_data, 0, available);
879            advance!(available);
880          }
881
882          // RFC 9112 section 7.1
883          STATE_CHUNK_HEADER => {
884            match find_cr(data, available) {
885              Some(cr) => {
886                match ensure_valid_line(data, cr, available) {
887                  MatchResult::Continue => {}
888                  MatchResult::Suspend => {
889                    suspend!();
890                  }
891                  MatchResult::Stop => {
892                    fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
893                  }
894                }
895
896                let chunk_length_start = 0;
897                // Note, the character is optional since chunk extensions are not required
898                let chunk_length_end = match find_char(data, chunk_length_start, cr, b';') {
899                  Some(index) => index,
900                  None => cr,
901                };
902
903                if chunk_length_end == 0 {
904                  fail!(UNEXPECTED_CHARACTER, "Invalid chunk length character");
905                } else if chunk_length_end - chunk_length_start > 16 {
906                  fail!(INVALID_CHUNK_SIZE, "Invalid chunk length size");
907                }
908
909                let mut i = chunk_length_start;
910                let mut chunk_length = 0u64;
911                while i < chunk_length_end {
912                  let b = data[i];
913
914                  let hex = if b.is_ascii_digit() {
915                    (b - b'0') as u64
916                  } else if (b'a'..=b'f').contains(&b) {
917                    (b - b'a' + 10) as u64
918                  } else if (b'A'..=b'F').contains(&b) {
919                    (b - b'A' + 10) as u64
920                  } else {
921                    fail!(UNEXPECTED_CHARACTER, "Invalid chunk length character");
922                  };
923
924                  chunk_length = chunk_length * 16 + hex;
925                  i += 1;
926                }
927
928                self.chunk_size = chunk_length;
929                self.remaining_chunk_size = chunk_length;
930
931                callback!(
932                  on_chunk_length,
933                  chunk_length_start,
934                  chunk_length_end - chunk_length_start
935                );
936
937                // There are extensions
938                if chunk_length_end < cr {
939                  advance!(chunk_length_end + 1);
940                  move_to!(chunk_extensions);
941                } else {
942                  self.continue_without_data = true;
943                  advance!(cr + 2);
944
945                  if self.chunk_size == 0 {
946                    callback!(on_chunk, 3, 0);
947                    callback!(on_body, 3, 0);
948                    move_to!(trailer);
949                  } else {
950                    move_to!(chunk_data);
951                  }
952                }
953              }
954              None => {
955                if available >= self.max_header_length {
956                  fail!(UNEXPECTED_CHARACTER, "Chunk header too long");
957                } else {
958                  suspend!();
959                }
960              }
961            }
962          }
963
964          STATE_CHUNK_EXTENSIONS => {
965            match find_cr(data, available) {
966              Some(cr) => {
967                match ensure_valid_line(data, cr, available) {
968                  MatchResult::Continue => {}
969                  MatchResult::Suspend => {
970                    suspend!();
971                  }
972                  MatchResult::Stop => {
973                    fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
974                  }
975                }
976
977                let mut name_start = 0;
978                // Find the first between = or ;
979                let name_end_raw = find_char2(data, name_start, cr, b'=', b';').unwrap_or(cr);
980                let mut name_end = name_end_raw;
981
982                if !strip_ows(data, &mut name_start, &mut name_end, false) {
983                  fail!(UNEXPECTED_CHARACTER, "Expected chunk extension name");
984                }
985
986                if !validate_token(data, name_start, name_end) {
987                  fail!(UNEXPECTED_CHARACTER, "Invalid chunk extension name character");
988                }
989
990                // No value
991                if name_end == cr || data[name_end_raw] == b';' {
992                  callback!(on_chunk_extension_name, name_start, name_end - name_start);
993
994                  if name_end_raw == cr {
995                    advance!(cr + 2);
996
997                    if self.chunk_size == 0 {
998                      callback!(on_body);
999                      move_to!(trailer);
1000                    } else {
1001                      move_to!(chunk_data);
1002                    }
1003                  } else {
1004                    advance!(name_end_raw + 1);
1005                    move_to!(chunk_extensions);
1006                  }
1007                } else {
1008                  // Get the value
1009                  let mut value_start = name_end_raw + 1;
1010                  let mut value_end: usize;
1011                  let next_extension: usize;
1012
1013                  // Strip OWS before the value
1014                  while value_start < cr && is_ws(data[value_start]) {
1015                    value_start += 1;
1016                  }
1017
1018                  if value_start == cr {
1019                    fail!(UNEXPECTED_CHARACTER, "Expected chunk extension value");
1020                  }
1021
1022                  // Quoted string
1023                  // RFC 9110 section 5.6.4
1024                  let mut quoted = false;
1025                  let quote_start = value_start;
1026                  if data[value_start] == b'"' {
1027                    quoted = true;
1028                    value_start += 1;
1029                    let mut quote_start = value_start;
1030
1031                    loop {
1032                      match find_char(data, quote_start, cr, b'"') {
1033                        Some(index) => {
1034                          // Count consecutive backslashes immediately before the quote
1035                          let mut backslash_count = 0usize;
1036                          let mut i = index;
1037
1038                          while i > quote_start && data[i - 1] == b'\\' {
1039                            backslash_count += 1;
1040                            i -= 1;
1041                          }
1042
1043                          if backslash_count.is_multiple_of(2) {
1044                            // quote is not escaped
1045                            value_end = index;
1046                            break;
1047                          } else {
1048                            // quote is escaped, continue searching after it
1049                            quote_start = index + 1;
1050                          }
1051                        }
1052                        None => {
1053                          fail!(UNEXPECTED_CHARACTER, "Expected closing quote for chunk extension value");
1054                        }
1055                      };
1056                    }
1057
1058                    if !validate_quoted_string(data, value_start, value_end) {
1059                      fail!(UNEXPECTED_CHARACTER, "Invalid chunk extension quoted value character");
1060                    }
1061
1062                    next_extension = value_end + 1;
1063                  } else {
1064                    value_end = find_char(data, value_start, cr, b';').unwrap_or(cr);
1065                    next_extension = if value_end == cr { cr } else { value_end };
1066
1067                    if !strip_ows(data, &mut value_start, &mut value_end, false) {
1068                      fail!(UNEXPECTED_CHARACTER, "Expected chunk extension value");
1069                    }
1070
1071                    if value_start != value_end && !validate_token(data, value_start, value_end) {
1072                      fail!(UNEXPECTED_CHARACTER, "Invalid chunk extension value character");
1073                    }
1074                  }
1075
1076                  callback!(on_chunk_extension_name, name_start, name_end - name_start);
1077
1078                  if quoted {
1079                    callback!(on_chunk_extension_value, quote_start, value_end - quote_start + 1);
1080                  } else {
1081                    callback!(on_chunk_extension_value, value_start, value_end - value_start);
1082                  }
1083
1084                  let next_semicolon = find_char(data, next_extension, cr, b';').unwrap_or(cr);
1085
1086                  let mut i = next_extension;
1087                  while i < next_semicolon {
1088                    if !is_ws(data[i]) {
1089                      fail!(UNEXPECTED_CHARACTER, "Invalid chunk extension character after value");
1090                    }
1091                    i += 1;
1092                  }
1093
1094                  if next_semicolon < cr {
1095                    advance!(next_semicolon + 1);
1096                  } else {
1097                    advance!(cr + 2);
1098
1099                    if self.chunk_size == 0 {
1100                      callback!(on_body);
1101                      move_to!(trailer);
1102                    } else {
1103                      move_to!(chunk_data);
1104                    }
1105                  }
1106                }
1107              }
1108              None => {
1109                // Given in chunk_header we already validated this, this should not happen.
1110                if available >= self.max_header_length {
1111                  fail!(UNEXPECTED_CHARACTER, "Chunk header too long");
1112                } else {
1113                  suspend!();
1114                }
1115              }
1116            }
1117          }
1118
1119          STATE_CHUNK_DATA => {
1120            let expected = self.remaining_chunk_size;
1121            let available_64 = available as u64;
1122
1123            // No more data for this chunk, just wait for the CRLF
1124            if expected == 0 {
1125              if available < 2 {
1126                suspend!();
1127              } else if data[0] != b'\r' || data[1] != b'\n' {
1128                fail!(UNEXPECTED_CHARACTER, "Expected CRLF after chunk data");
1129              } else {
1130                advance!(2);
1131                move_to!(chunk_header);
1132              }
1133            } else if available_64 < expected {
1134              // Less data than what it is expected for this chunk
1135              self.remaining_chunk_size -= available_64;
1136
1137              callback!(on_chunk, 0, available);
1138              callback!(on_data, 0, available);
1139
1140              advance!(available);
1141            } else {
1142              self.remaining_chunk_size = 0;
1143
1144              callback!(on_chunk, 0, expected as usize);
1145              callback!(on_data, 0, expected as usize);
1146
1147              advance!(expected as usize);
1148            }
1149          }
1150
1151          // RFC 9112 section 7.1.2
1152          STATE_TRAILER => {
1153            match find_header_line_end(data.as_ptr(), available) {
1154              HeaderLineScanResult::Cr(cr) => {
1155                match ensure_valid_line(data, cr, available) {
1156                  MatchResult::Continue => {}
1157                  MatchResult::Suspend => {
1158                    suspend!();
1159                  }
1160                  MatchResult::Stop => {
1161                    fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
1162                  }
1163                }
1164
1165                // No more trailers or no trailers at all, message completed
1166                if cr == 0 {
1167                  callback!(on_trailers, 2, 0);
1168                  self.continue_without_data = true;
1169                  advance!(2);
1170                  self.complete(2);
1171                  next!();
1172                }
1173
1174                let trailer_name_start = 0;
1175                let trailer_name_end = match find_char(data, trailer_name_start, cr, b':') {
1176                  Some(index) if index > trailer_name_start => index,
1177                  _ => {
1178                    fail!(UNEXPECTED_CHARACTER, "Invalid trailer field name character");
1179                  }
1180                };
1181
1182                let mut trailer_value_start = trailer_name_end + 1;
1183                let mut trailer_value_end = cr;
1184                if has_trailer_value_callback {
1185                  strip_ows_fast(data, &mut trailer_value_start, &mut trailer_value_end, true);
1186                }
1187
1188                // Validate
1189                if !validate_token(data, trailer_name_start, trailer_name_end) {
1190                  fail!(UNEXPECTED_CHARACTER, "Invalid trailer field name character");
1191                }
1192
1193                if has_trailer_name_callback {
1194                  callback!(
1195                    on_trailer_name,
1196                    trailer_name_start,
1197                    trailer_name_end - trailer_name_start
1198                  );
1199                }
1200
1201                if has_trailer_value_callback {
1202                  callback!(
1203                    on_trailer_value,
1204                    trailer_value_start,
1205                    trailer_value_end - trailer_value_start
1206                  );
1207                }
1208                advance!(cr + 2);
1209              }
1210              HeaderLineScanResult::Invalid(invalid) => {
1211                match find_char(data, 0, invalid, b':') {
1212                  Some(_) => {
1213                    fail!(UNEXPECTED_CHARACTER, "Invalid trailer field value character");
1214                  }
1215                  None => {
1216                    fail!(UNEXPECTED_CHARACTER, "Invalid trailer field name character");
1217                  }
1218                }
1219              }
1220              HeaderLineScanResult::Incomplete => {
1221                if available >= self.max_header_length {
1222                  fail!(UNEXPECTED_CHARACTER, "Trailer line too long");
1223                } else {
1224                  suspend!();
1225                }
1226              }
1227            }
1228          }
1229
1230          // Return PAUSE makes this method idempotent without failing - In this state
1231          // all data is ignored since the connection is not in HTTP anymore
1232          STATE_TUNNEL => {
1233            suspend!();
1234          }
1235
1236          _ => {
1237            fail!(UNEXPECTED_STATE, "Invalid state");
1238          }
1239        }
1240      }
1241
1242      // Update the parser position
1243      if advanced > 0 {
1244        self.position += advanced;
1245        data = &data[advanced..];
1246        available -= advanced;
1247
1248        #[cfg(any(debug_assertions, feature = "debug"))]
1249        if self.debug {
1250          eprintln!(
1251            "[milo_parser::debug] loop before processing: position={}, advanced={}, available={}, \
1252             continue_without_data={}",
1253            self.position, advanced, available, self.continue_without_data
1254          );
1255        }
1256      }
1257
1258      // Notify the status change
1259      #[cfg(any(debug_assertions, feature = "debug"))]
1260      if previous_state != self.state {
1261        callback!(on_state_change);
1262        previous_state = self.state;
1263      }
1264
1265      // Show the duration of the operation
1266      #[cfg(all(not(target_family = "wasm"), any(debug_assertions, feature = "debug")))]
1267      if self.debug {
1268        let duration = Instant::now().duration_since(last).as_nanos();
1269
1270        if duration > 0 {
1271          eprintln!(
1272            "[milo_parser::debug] loop iteration ({:?}) completed in {} ns",
1273            self.state_str(),
1274            duration
1275          );
1276        }
1277
1278        last = Instant::now();
1279      }
1280    }
1281
1282    #[cfg(any(debug_assertions, feature = "debug"))]
1283    if self.debug {
1284      eprintln!("[milo_parser::debug] loop exit");
1285    }
1286
1287    let consumed = self.position;
1288    self.parsed += consumed as u64;
1289
1290    if self.manage_unconsumed {
1291      unsafe {
1292        // Drop any previous retained data
1293        if unconsumed_len > 0 {
1294          let _ = from_raw_parts(self.unconsumed, unconsumed_len);
1295        }
1296
1297        // If less bytes were consumed than requested, copy the unconsumed portion in
1298        // the self.for the next iteration
1299        if consumed < limit {
1300          let (ptr, len, _) = data.to_vec().into_raw_parts();
1301
1302          self.unconsumed = ptr;
1303          self.unconsumed_len = len;
1304        } else {
1305          self.unconsumed = ptr::null();
1306          self.unconsumed_len = 0;
1307        }
1308      }
1309    }
1310
1311    #[cfg(all(not(target_family = "wasm"), any(debug_assertions, feature = "debug")))]
1312    if self.debug {
1313      let duration = Instant::now().duration_since(start).as_nanos();
1314
1315      if duration > 0 {
1316        eprintln!(
1317          "[milo_parser::debug] parse ({:?}, consumed {} of {}) completed in {} ns",
1318          self.state_str(),
1319          consumed,
1320          limit,
1321          duration
1322        );
1323      }
1324    }
1325
1326    // Return the number of consumed bytes
1327    consumed
1328  }
1329
1330  // RFC 9110 section 6.4.1 - Message completed
1331  #[inline(always)]
1332  fn complete(&mut self, offset: usize) {
1333    if self.active_callbacks != 0 {
1334      callback!(on_message_complete, offset, 0);
1335      callback!(on_reset, offset, 0);
1336    }
1337
1338    self.continue_without_data = false;
1339    self.skip_body = false;
1340
1341    if self.has_upgrade && self.is_request {
1342      move_to!(tunnel);
1343    } else if self.has_connection_close {
1344      if self.active_callbacks != 0 {
1345        callback!(on_finish);
1346      }
1347      move_to!(finish);
1348    } else {
1349      move_to!(start);
1350    }
1351  }
1352}