Skip to main content

milo_parser/
parse.rs

1#![allow(clippy::not_unsafe_ptr_arg_deref)]
2
3extern crate alloc;
4
5use alloc::vec::Vec;
6use alloc::{boxed::Box, format};
7use core::cell::{Cell, RefCell};
8use core::ffi::{c_char, c_uchar, c_void};
9use core::fmt::Debug;
10use core::ptr;
11use core::str;
12use core::{slice, slice::from_raw_parts};
13use std::string;
14#[cfg(all(not(target_family = "wasm"), any(debug_assertions, feature = "debug")))]
15use std::time::Instant;
16
17use milo_macros::*;
18
19use crate::Methods::CONNECT;
20use crate::matchers::*;
21use crate::*;
22
23impl Parser {
24  /// Parses a slice of characters.
25  ///
26  /// It returns the number of consumed characters.
27  pub fn parse(&mut self, input: *const c_uchar, limit: usize) -> usize {
28    // If the self.is paused, this is a no-op
29    if self.paused {
30      return 0;
31    }
32
33    let input = unsafe { from_raw_parts(input, limit) };
34
35    // Set the data to analyze, prepending unconsumed data from previous iteration
36    // if needed
37    let mut limit = limit;
38    let aggregate: Vec<c_uchar>;
39    let unconsumed_len = self.unconsumed_len;
40
41    let mut data = if self.manage_unconsumed && unconsumed_len > 0 {
42      unsafe {
43        limit += unconsumed_len;
44        let unconsumed = from_raw_parts(self.unconsumed, unconsumed_len);
45
46        aggregate = [unconsumed, input].concat();
47        &aggregate[..]
48      }
49    } else {
50      input
51    };
52
53    // Limit the data that is currently analyzed
54    data = &data[..limit];
55    let mut available = data.len();
56
57    #[cfg(all(not(target_family = "wasm"), any(debug_assertions, feature = "debug")))]
58    let mut last = Instant::now();
59
60    #[cfg(all(not(target_family = "wasm"), any(debug_assertions, feature = "debug")))]
61    let start = Instant::now();
62
63    #[cfg(any(debug_assertions, feature = "debug"))]
64    let mut previous_state = self.state;
65
66    #[cfg(any(debug_assertions, feature = "debug"))]
67    let previous_position = self.position;
68
69    // States will advance position manually, the parser has to explicitly
70    // track it
71    self.position = 0;
72    let mut advanced: usize;
73    let mut parsing = true;
74    let has_active_callbacks = self.active_callbacks != 0;
75    let has_header_name_callback = self.active_callbacks & CALLBACK_ACTIVE_ON_HEADER_NAME != 0;
76    let has_header_value_callback = self.active_callbacks & CALLBACK_ACTIVE_ON_HEADER_VALUE != 0;
77    let has_trailer_name_callback = self.active_callbacks & CALLBACK_ACTIVE_ON_TRAILER_NAME != 0;
78    let has_trailer_value_callback = self.active_callbacks & CALLBACK_ACTIVE_ON_TRAILER_VALUE != 0;
79
80    #[cfg(any(debug_assertions, feature = "debug"))]
81    if self.debug {
82      eprintln!("[milo_parser::debug] loop enter");
83    }
84
85    // Until there is data or there is a request to continue
86    'parser: while parsing && (!self.paused) && (available != 0 || self.continue_without_data) {
87      #[cfg(any(debug_assertions, feature = "debug"))]
88      if self.debug {
89        eprintln!(
90          "[milo_parser::debug] loop before processing: previous_position={}, position={}, available={}, \
91           continue_without_data={}",
92          previous_position, self.position, available, self.continue_without_data
93        );
94      }
95
96      // Reset the continue_without_data flag
97      self.continue_without_data = false;
98      advanced = 0;
99
100      'state: {
101        match self.state {
102          // If the parser has finished and it receives more data, error
103          STATE_FINISH => {
104            fail!(UNEXPECTED_CHARACTER, "Unexpected data");
105          }
106
107          // The error state is a no-op
108          STATE_ERROR => {
109            suspend!();
110          }
111
112          // Choose the initial state depending on the configured message type.
113          STATE_START => {
114            if !self.autodetect && self.is_request {
115              if has_active_callbacks {
116                callback!(on_request);
117                callback!(on_message_start);
118              }
119              move_to!(request_line);
120            } else if !self.autodetect {
121              if has_active_callbacks {
122                callback!(on_response);
123                callback!(on_message_start);
124              }
125              move_to!(status_line);
126            } else if data.len() >= 5 && data[4] == b'/' && data.starts_with(b"HTTP") {
127              self.is_request = false;
128              if has_active_callbacks {
129                callback!(on_response);
130                callback!(on_message_start);
131              }
132              move_to!(status_line);
133            } else if data.len() >= 2 && data.starts_with(b"\r\n") {
134              // RFC 9112 section 2.2
135              advance!(2);
136            } else {
137              // For performance reason, we assume it's a request so we don't lookup the
138              // method twice
139              self.is_request = true;
140              if has_active_callbacks {
141                callback!(on_request);
142                callback!(on_message_start);
143              }
144              move_to!(request_line);
145            }
146          }
147
148          STATE_REQUEST_LINE => {
149            match find_cr(data, available) {
150              // // RFC 9112 section 3
151              Some(cr) => {
152                match ensure_valid_line(data, cr, available) {
153                  MatchResult::Continue => {}
154                  MatchResult::Suspend => {
155                    suspend!();
156                  }
157                  MatchResult::Stop => {
158                    fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
159                  }
160                }
161
162                // RFC 9112 section 2.2 - Repeated
163                if cr == 0 {
164                  advance!(2);
165                  next!();
166                } else if cr < 14
167                // Length of "GET / HTTP/1.1"
168                {
169                  fail!(UNEXPECTED_CHARACTER, "Request line too short");
170                }
171
172                // The line is potentially valid, clear the parser
173                self.clear();
174
175                // RFC 9112 section 3.1
176                let method_start = 0;
177                let method_end = match find_char(data, method_start, cr, b' ') {
178                  Some(index) if index > method_start => index,
179                  _ => {
180                    fail!(UNEXPECTED_CHARACTER, "Expected space after method");
181                  }
182                };
183
184                // RFC 9112 section 3.2
185                let url_start = method_end + 1;
186                let url_end = match find_char(data, url_start, cr, b' ') {
187                  Some(index) if index > url_start => index,
188                  _ => {
189                    fail!(UNEXPECTED_CHARACTER, "Expected space after URL");
190                  }
191                };
192
193                // RFC 9112 section 2.3
194                let protocol_start = url_end + 1;
195                let protocol_end = match find_char(data, protocol_start, cr, b'/') {
196                  Some(index) if index > protocol_start => index,
197                  _ => {
198                    fail!(UNEXPECTED_CHARACTER, "Expected / after the protocol name");
199                  }
200                };
201
202                let method_slice = &data[method_start..method_end];
203                self.method = match method_slice.len() {
204                  3 => {
205                    match method_slice {
206                      b"GET" => METHOD_GET,
207                      b"PUT" => METHOD_PUT,
208                      b"PRI" => METHOD_PRI,
209                      _ => METHOD_OTHER,
210                    }
211                  }
212                  4 => {
213                    match method_slice {
214                      b"HEAD" => METHOD_HEAD,
215                      b"POST" => METHOD_POST,
216                      _ => METHOD_OTHER,
217                    }
218                  }
219                  5 => {
220                    match method_slice {
221                      b"PATCH" => METHOD_PATCH,
222                      b"TRACE" => METHOD_TRACE,
223                      _ => METHOD_OTHER,
224                    }
225                  }
226                  6 => {
227                    match method_slice {
228                      b"DELETE" => METHOD_DELETE,
229                      _ => METHOD_OTHER,
230                    }
231                  }
232                  7 => {
233                    match method_slice {
234                      b"CONNECT" => {
235                        self.is_connect = true;
236                        METHOD_CONNECT
237                      }
238                      b"OPTIONS" => METHOD_OPTIONS,
239                      _ => METHOD_OTHER,
240                    }
241                  }
242                  _ => METHOD_OTHER,
243                };
244
245                if self.method == METHOD_OTHER && !validate_token(data, method_start, method_end) {
246                  fail!(UNEXPECTED_CHARACTER, "Invalid method character");
247                }
248
249                if !validate_url(data, url_start, url_end) {
250                  fail!(UNEXPECTED_CHARACTER, "Invalid URL character");
251                }
252
253                let version_start = protocol_end + 1;
254                if cr != protocol_start + 8 {
255                  fail!(UNEXPECTED_CHARACTER, "Invalid protocol name");
256                }
257
258                if &data[protocol_start..cr] == b"HTTP/1.1" {
259                  if self.method == METHOD_PRI {
260                    fail!(UNSUPPORTED_HTTP_VERSION, "PRI is only valid with HTTP/2.0");
261                  }
262
263                  self.version_major = 1;
264                  self.version_minor = 1;
265                } else if &data[protocol_start..cr] == b"HTTP/2.0" {
266                  if self.method != METHOD_PRI {
267                    fail!(UNSUPPORTED_HTTP_VERSION, "Unsupported HTTP version");
268                  }
269
270                  self.version_major = 2;
271                  self.version_minor = 0;
272                } else {
273                  fail!(UNEXPECTED_CHARACTER, "Invalid protocol");
274                }
275
276                if has_active_callbacks {
277                  callback!(on_method, method_start, method_end - method_start);
278                  callback!(on_url, url_start, url_end - url_start);
279                  callback!(on_protocol, protocol_start, protocol_end - protocol_start);
280                  callback!(on_version, version_start, 3);
281                }
282
283                advance!(cr + 2);
284
285                if self.method == METHOD_PRI {
286                  move_to!(http2_preface);
287                } else {
288                  move_to!(header);
289                }
290              }
291              None => {
292                if available >= self.max_start_line_length {
293                  fail!(UNEXPECTED_CHARACTER, "Request line too long");
294                } else {
295                  suspend!();
296                }
297              }
298            }
299          }
300
301          // RFC 9112 section 4
302          STATE_STATUS_LINE => {
303            match find_cr(data, available) {
304              Some(cr) => {
305                match ensure_valid_line(data, cr, available) {
306                  MatchResult::Continue => {}
307                  MatchResult::Suspend => {
308                    suspend!();
309                  }
310                  MatchResult::Stop => {
311                    fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
312                  }
313                }
314
315                // RFC 9112 section 2.2 - Repeated
316                if cr == 0 {
317                  advance!(2);
318                  next!();
319                } else if cr < 13
320                // Length of "HTTP/1.1 200 "
321                {
322                  fail!(UNEXPECTED_CHARACTER, "Status line too short");
323                }
324
325                // The line is potentially valid, clear the parser
326                self.clear();
327
328                let protocol_start = 0;
329                let protocol_end = 4;
330                let version_start = protocol_end + 1;
331                let version_end = protocol_start + 8;
332
333                if cr < version_end || data[version_end] != b' ' {
334                  fail!(UNEXPECTED_CHARACTER, "Expected space after protocol");
335                }
336
337                match &data[protocol_start..version_end] {
338                  b"HTTP/1.1" => {
339                    self.version_major = 1;
340                    self.version_minor = 1;
341                  }
342                  [b'H', b'T', b'T', b'P', b'/', ..] => {
343                    fail!(UNSUPPORTED_HTTP_VERSION, "Unsupported HTTP version");
344                  }
345                  _ => {
346                    fail!(UNEXPECTED_CHARACTER, "Invalid protocol");
347                  }
348                }
349
350                let status_start = version_end + 1;
351                // Even if the reason is empty, there must be at least a space after the status
352                // code. So three digits plus a space
353                let status_end = version_end + 5;
354                if status_end > cr {
355                  fail!(INVALID_STATUS, "Expected HTTP response status");
356                }
357
358                if !is_digit(data[status_start])
359                  || !is_digit(data[status_start + 1])
360                  || !is_digit(data[status_start + 2])
361                {
362                  fail!(INVALID_STATUS, "Invalid HTTP response status");
363                }
364
365                if data[status_start + 3] != b' ' {
366                  fail!(INVALID_STATUS, "Expected a space after HTTP response status");
367                }
368
369                let reason_start = status_start + 4;
370                let reason_end = cr;
371                if reason_start != reason_end
372                  && unsafe { !validate_token_value(data.as_ptr().add(reason_start), reason_end - reason_start) }
373                {
374                  fail!(UNEXPECTED_CHARACTER, "Invalid status reason character");
375                }
376
377                self.status = ((data[status_start] - b'0') as u32) * 100
378                  + ((data[status_start + 1] - b'0') as u32) * 10
379                  + (data[status_start + 2] - b'0') as u32;
380
381                if has_active_callbacks {
382                  callback!(on_protocol, protocol_start, 4);
383                  callback!(on_version, version_start, 3);
384                  callback!(on_status, status_start, 3);
385                  if reason_end > reason_start {
386                    callback!(on_reason, reason_start, reason_end - reason_start);
387                  }
388                }
389
390                advance!(cr + 2);
391                move_to!(header);
392              }
393              None => {
394                if available >= self.max_start_line_length {
395                  fail!(UNEXPECTED_CHARACTER, "Status line too long");
396                } else {
397                  suspend!();
398                }
399              }
400            }
401          }
402
403          STATE_HTTP2_PREFACE => {
404            if available < 8 {
405              suspend!();
406            } else if &data[..8] == b"\r\nSM\r\n\r\n" {
407              advance!(8);
408              move_to!(tunnel);
409            } else {
410              fail!(UNEXPECTED_CHARACTER, "Malformed HTTP/2.0 preface");
411            }
412          }
413
414          STATE_HEADER => {
415            match find_header_line_end(data.as_ptr(), available) {
416              HeaderLineScanResult::Cr(cr) => {
417                match ensure_valid_line(data, cr, available) {
418                  MatchResult::Continue => {}
419                  MatchResult::Suspend => {
420                    suspend!();
421                  }
422                  MatchResult::Stop => {
423                    fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
424                  }
425                }
426
427                // No more headers or no headers at all, move to the headers state
428                if cr == 0 {
429                  self.continue_without_data = true;
430                  advance!(2);
431                  move_to!(body_decision);
432                  next!();
433                }
434
435                // RFC 9112 section.4
436                // RFC 9110 section 5.5 and 5.6
437                let header_name_start = 0;
438                let header_name_end = match find_char(data, header_name_start, cr, b':') {
439                  Some(index) if index > header_name_start => index,
440                  _ => {
441                    fail!(UNEXPECTED_CHARACTER, "Invalid header field name character");
442                  }
443                };
444                let mut header_value_start = header_name_end + 1;
445                let mut header_value_end = cr;
446
447                let status = self.status;
448                let first_header_byte = data[header_name_start];
449                if !matches!(first_header_byte, b'c' | b'C' | b't' | b'T' | b'u' | b'U') {
450                  if !validate_token(data, header_name_start, header_name_end) {
451                    fail!(UNEXPECTED_CHARACTER, "Invalid header field name character");
452                  }
453
454                  if has_header_value_callback {
455                    strip_ows_fast(data, &mut header_value_start, &mut header_value_end, true);
456                  }
457                } else {
458                  let header_name_len = header_name_end - header_name_start;
459                  match (header_name_len, &data[header_name_start..header_name_end]) {
460                    // RFC 9112 section 6.2
461                    (14, case_insensitive_string!("content-length")) => {
462                      if self.has_transfer_encoding {
463                        fail!(
464                          UNEXPECTED_CONTENT_LENGTH,
465                          "Unexpected Content-Length header when Transfer-Encoding header is present"
466                        );
467                      } else if status == 205 || status == 204 || status / 100 == 1 {
468                        fail!(
469                          UNEXPECTED_CONTENT_LENGTH,
470                          "Unexpected Content-Length header for a response without body"
471                        );
472                      } else if self.has_content_length {
473                        fail!(INVALID_CONTENT_LENGTH, "Invalid duplicate Content-Length header");
474                      }
475
476                      if header_value_start < cr && !is_ws(data[cr - 1]) {
477                        let value_start = if data[header_value_start] == b' ' {
478                          header_value_start + 1
479                        } else {
480                          header_value_start
481                        };
482
483                        if value_start < cr && !is_ws(data[value_start]) {
484                          header_value_start = value_start;
485                        } else if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
486                          fail!(UNEXPECTED_CHARACTER, "Expected Content-Length header value");
487                        }
488                      } else if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
489                        fail!(UNEXPECTED_CHARACTER, "Expected Content-Length header value");
490                      }
491
492                      let mut i = header_value_start;
493                      let mut content_length = 0u64;
494
495                      if header_value_end - header_value_start > 19 {
496                        // Milo caps Content-Length at 19 digits as a practical limit. This keeps
497                        // parsing overflow-safe while allowing values far
498                        // beyond realistic message sizes.
499                        fail!(INVALID_CONTENT_LENGTH, "Invalid Content-Length header");
500                      }
501
502                      while i < header_value_end {
503                        let current = data[i];
504                        if !is_digit(current) {
505                          fail!(INVALID_CONTENT_LENGTH, "Invalid Content-Length header");
506                        }
507
508                        content_length = content_length * 10 + (current - b'0') as u64;
509                        i += 1;
510                      }
511
512                      self.has_content_length = true;
513                      self.content_length = content_length;
514                      self.remaining_content_length = content_length;
515                    }
516                    // RFC 9112 section 6.1
517                    (17, case_insensitive_string!("transfer-encoding")) => {
518                      if self.has_content_length {
519                        fail!(
520                          UNEXPECTED_TRANSFER_ENCODING,
521                          "Unexpected Transfer-Encoding header when Content-Length header is present"
522                        );
523                      } else if status == 304 || status == 205 || status == 204 || status / 100 == 1 {
524                        fail!(
525                          UNEXPECTED_TRANSFER_ENCODING,
526                          "Unexpected Transfer-Encoding header for a response without body"
527                        );
528                      }
529
530                      if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
531                        fail!(UNEXPECTED_CHARACTER, "Expected Transfer-Encoding header value");
532                      }
533
534                      self.has_transfer_encoding = true;
535
536                      if &data[header_value_start..header_value_end] == b"chunked" {
537                        // If this is true, it means the Transfer-Encoding header was specified more
538                        // than once. This is the second repetition and therefore, the previous one is
539                        // no longer the last one, making it invalid.
540                        if self.has_chunked_transfer_encoding {
541                          fail!(
542                            INVALID_TRANSFER_ENCODING,
543                            "The value \"chunked\" in the Transfer-Encoding header must be the last provided and can \
544                             be provided only once"
545                          );
546                        }
547
548                        self.has_chunked_transfer_encoding = true;
549                      } else {
550                        let mut token_start = header_value_start;
551                        loop {
552                          while token_start < header_value_end && is_ws(data[token_start]) {
553                            token_start += 1;
554                          }
555
556                          if token_start == header_value_end {
557                            break;
558                          }
559
560                          let token_end_raw = match find_char(data, token_start, header_value_end, b',') {
561                            Some(comma) => comma,
562                            None => header_value_end,
563                          };
564                          let mut token_end = token_end_raw;
565
566                          if !strip_ows_fast(data, &mut token_start, &mut token_end, false) {
567                            fail!(UNEXPECTED_CHARACTER, "Expected Transfer-Encoding header value");
568                          }
569
570                          self.has_transfer_encoding = true;
571
572                          if let case_insensitive_string!("chunked") = data[token_start..token_end] {
573                            // If this is true, it means the Transfer-Encoding header was specified more
574                            // than once. This is the second repetition and therefore, the previous one is
575                            // no longer the last one, making it invalid.
576                            if self.has_chunked_transfer_encoding {
577                              fail!(
578                                INVALID_TRANSFER_ENCODING,
579                                "The value \"chunked\" in the Transfer-Encoding header must be the last provided and \
580                                 can be provided only once"
581                              );
582                            }
583
584                            self.has_chunked_transfer_encoding = true;
585                          } else {
586                            if self.has_chunked_transfer_encoding {
587                              // Any other value when chunked was already specified is invalid as the previous
588                              // chunked would not be the last one anymore
589                              fail!(
590                                INVALID_TRANSFER_ENCODING,
591                                "The value \"chunked\" in the Transfer-Encoding header must be the last provided"
592                              );
593                            }
594                          }
595
596                          if token_end_raw == header_value_end {
597                            break;
598                          } else {
599                            token_start = token_end_raw + 1;
600                          }
601                        }
602                      }
603                    }
604                    // RFC 9112 section 9.6
605                    (10, case_insensitive_string!("connection")) => {
606                      if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
607                        fail!(UNEXPECTED_CHARACTER, "Expected Connection header value");
608                      }
609
610                      match data[header_value_start..header_value_end] {
611                        case_insensitive_string!("close") => {
612                          self.has_connection_close = true;
613                        }
614                        case_insensitive_string!("keep-alive") => {
615                          // Keep-alive is implicit unless Connection: close is
616                          // present.
617                        }
618                        case_insensitive_string!("upgrade") => {
619                          self.has_connection_upgrade = true;
620                        }
621                        _ => {
622                          // Comma separated values
623                          let mut token_start = header_value_start;
624                          loop {
625                            while token_start < header_value_end && is_ws(data[token_start]) {
626                              token_start += 1;
627                            }
628
629                            if token_start == header_value_end {
630                              break;
631                            }
632
633                            let token_end_raw = match find_char(data, token_start, header_value_end, b',') {
634                              Some(comma) => comma,
635                              None => header_value_end,
636                            };
637                            let mut token_end = token_end_raw;
638
639                            if !strip_ows_fast(data, &mut token_start, &mut token_end, false) {
640                              fail!(UNEXPECTED_CHARACTER, "Expected Connection header value");
641                            }
642
643                            match data[token_start..token_end] {
644                              case_insensitive_string!("close") => {
645                                self.has_connection_close = true;
646                              }
647                              case_insensitive_string!("upgrade") => {
648                                self.has_connection_upgrade = true;
649                              }
650                              case_insensitive_string!("keep-alive") => {}
651                              _ => {
652                                if !validate_token(data, token_start, token_end) {
653                                  fail!(UNEXPECTED_CHARACTER, "Invalid Connection header value");
654                                }
655                              }
656                            }
657
658                            if token_end_raw == header_value_end {
659                              break;
660                            } else {
661                              token_start = token_end_raw + 1;
662                            }
663                          }
664                        }
665                      }
666                    }
667                    (7, case_insensitive_string!("trailer")) => {
668                      self.has_trailers = true;
669
670                      if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
671                        fail!(UNEXPECTED_CHARACTER, "Expected Trailer header value");
672                      }
673                    }
674                    (7, case_insensitive_string!("upgrade")) => {
675                      if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
676                        fail!(UNEXPECTED_CHARACTER, "Expected Upgrade header value");
677                      }
678
679                      let mut token_start = header_value_start;
680                      loop {
681                        while token_start < header_value_end && is_ws(data[token_start]) {
682                          token_start += 1;
683                        }
684
685                        if token_start == header_value_end {
686                          break;
687                        }
688
689                        let token_end_raw = match find_char(data, token_start, header_value_end, b',') {
690                          Some(comma) => comma,
691                          None => header_value_end,
692                        };
693                        let mut token_end = token_end_raw;
694
695                        if !strip_ows_fast(data, &mut token_start, &mut token_end, false) {
696                          fail!(UNEXPECTED_CHARACTER, "Expected Upgrade header value");
697                        }
698
699                        let protocol_name_end = find_char(data, token_start, token_end, b'/').unwrap_or(token_end);
700                        if !validate_token(data, token_start, protocol_name_end) {
701                          fail!(UNEXPECTED_CHARACTER, "Invalid Upgrade header value");
702                        }
703
704                        if protocol_name_end < token_end {
705                          let protocol_version_start = protocol_name_end + 1;
706                          if find_char(data, protocol_version_start, token_end, b'/').is_some()
707                            || !validate_token(data, protocol_version_start, token_end)
708                          {
709                            fail!(UNEXPECTED_CHARACTER, "Invalid Upgrade header value");
710                          }
711                        }
712
713                        if token_end_raw == header_value_end {
714                          break;
715                        } else {
716                          token_start = token_end_raw + 1;
717                        }
718                      }
719
720                      self.has_upgrade = true;
721                    }
722                    _ => {
723                      if !validate_token(data, header_name_start, header_name_end) {
724                        fail!(UNEXPECTED_CHARACTER, "Invalid header field name character");
725                      }
726
727                      if has_header_value_callback {
728                        strip_ows_fast(data, &mut header_value_start, &mut header_value_end, true);
729                      }
730                    }
731                  }
732                }
733
734                if has_header_name_callback {
735                  callback!(on_header_name, header_name_start, header_name_end - header_name_start);
736                }
737
738                if has_header_value_callback {
739                  callback!(
740                    on_header_value,
741                    header_value_start,
742                    header_value_end - header_value_start
743                  );
744                }
745
746                advance!(cr + 2);
747              }
748              HeaderLineScanResult::Invalid(invalid) => {
749                match find_char(data, 0, invalid, b':') {
750                  Some(_) => {
751                    fail!(UNEXPECTED_CHARACTER, "Invalid header field value character");
752                  }
753                  None => {
754                    fail!(UNEXPECTED_CHARACTER, "Invalid header field name character");
755                  }
756                }
757              }
758              HeaderLineScanResult::Incomplete => {
759                if available >= self.max_header_length {
760                  fail!(UNEXPECTED_CHARACTER, "Header line too long");
761                } else {
762                  suspend!();
763                }
764              }
765            }
766          }
767
768          // RFC 9110 section 9.3.6 and 7.8 - Headers have finished, check if the
769          // connection must be upgraded or a body is expected.
770          STATE_BODY_DECISION => {
771            if has_active_callbacks {
772              callback!(on_headers);
773            }
774
775            let method = self.method;
776            let status = self.status;
777
778            if self.has_upgrade && !self.has_connection_upgrade {
779              fail!(
780                MISSING_CONNECTION_UPGRADE,
781                "Missing Connection header set to \"upgrade\" when using the Upgrade header"
782              );
783            }
784
785            if self.has_trailers && !self.has_chunked_transfer_encoding {
786              fail!(
787                UNEXPECTED_TRAILERS,
788                "Trailers are not allowed when not using chunked transfer encoding"
789              );
790            } else if self.is_request && (method == METHOD_GET || method == METHOD_HEAD) && self.content_length > 0 {
791              fail!(UNEXPECTED_CONTENT, "Unexpected content for the request (GET or HEAD)");
792            }
793
794            // In case of Connection: Upgrade or a CONNECT method
795            if self.is_connect {
796              // In case of CONNECT method
797              callback!(on_connect);
798              move_to!(tunnel);
799            } else if self.has_upgrade && !self.is_request && status == 101 {
800              callback!(on_upgrade);
801              move_to!(tunnel);
802            } else if self.is_request {
803              if self.has_transfer_encoding && !self.has_chunked_transfer_encoding {
804                fail!(
805                  UNEXPECTED_CONTENT_LENGTH,
806                  "Transfer-Encoding last header value must be \"chunked\" if the header is present"
807                );
808              } else if self.skip_body {
809                self.continue_without_data = true;
810                self.complete(0);
811              } else if self.has_content_length {
812                // RFC 9110 section 6.3
813                if self.content_length == 0 {
814                  self.continue_without_data = true;
815                  self.complete(0);
816                } else {
817                  move_to!(body_via_content_length);
818                }
819              } else if !self.has_chunked_transfer_encoding {
820                self.continue_without_data = true;
821                self.complete(0);
822              } else {
823                move_to!(chunk_header);
824              }
825            } else {
826              // Response
827              // RFC 9110 section 15.4.5
828              if self.skip_body || (status < 200 && status != 101) || status == 204 || status == 205 || status == 304 {
829                self.continue_without_data = true;
830                self.complete(0);
831              } else if self.has_content_length {
832                if self.content_length == 0 {
833                  self.continue_without_data = true;
834                  self.complete(0);
835                } else {
836                  move_to!(body_via_content_length);
837                }
838              } else if self.has_chunked_transfer_encoding {
839                move_to!(chunk_header);
840              } else {
841                move_to!(body_with_no_length);
842              }
843            }
844          }
845
846          // RFC 9112 section 6.2
847          STATE_BODY_VIA_CONTENT_LENGTH => {
848            let expected = self.remaining_content_length;
849            let available_64 = available as u64;
850
851            // Less data than what it is expected
852            if available_64 < expected {
853              self.remaining_content_length -= available_64;
854
855              callback!(on_data, 0, available);
856              advance!(available);
857            } else {
858              self.remaining_content_length = 0;
859
860              callback!(on_data, 0, expected as usize);
861              callback!(on_body, expected as usize, 0);
862
863              self.continue_without_data = true;
864
865              advance!(expected as usize);
866              self.complete(expected as usize);
867            }
868          }
869
870          // RFC 9110 section 6.3 - Body with no length nor chunked encoding.
871          // This is only allowed in responses.
872          // Note that on_body can't and will not be called here as there is no way to
873          // know when the response finishes.
874          STATE_BODY_WITH_NO_LENGTH => {
875            callback!(on_data, 0, available);
876            advance!(available);
877          }
878
879          // RFC 9112 section 7.1
880          STATE_CHUNK_HEADER => {
881            match find_cr(data, available) {
882              Some(cr) => {
883                match ensure_valid_line(data, cr, available) {
884                  MatchResult::Continue => {}
885                  MatchResult::Suspend => {
886                    suspend!();
887                  }
888                  MatchResult::Stop => {
889                    fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
890                  }
891                }
892
893                let chunk_length_start = 0;
894                // Note, the character is optional since chunk extensions are not required
895                let chunk_length_end = match find_char(data, chunk_length_start, cr, b';') {
896                  Some(index) => index,
897                  None => cr,
898                };
899
900                if chunk_length_end == 0 {
901                  fail!(UNEXPECTED_CHARACTER, "Invalid chunk length character");
902                } else if chunk_length_end - chunk_length_start > 16 {
903                  fail!(INVALID_CHUNK_SIZE, "Invalid chunk length size");
904                }
905
906                let mut i = chunk_length_start;
907                let mut chunk_length = 0u64;
908                while i < chunk_length_end {
909                  let b = data[i];
910
911                  let hex = if b.is_ascii_digit() {
912                    (b - b'0') as u64
913                  } else if (b'a'..=b'f').contains(&b) {
914                    (b - b'a' + 10) as u64
915                  } else if (b'A'..=b'F').contains(&b) {
916                    (b - b'A' + 10) as u64
917                  } else {
918                    fail!(UNEXPECTED_CHARACTER, "Invalid chunk length character");
919                  };
920
921                  chunk_length = chunk_length * 16 + hex;
922                  i += 1;
923                }
924
925                self.chunk_size = chunk_length;
926                self.remaining_chunk_size = chunk_length;
927
928                callback!(
929                  on_chunk_length,
930                  chunk_length_start,
931                  chunk_length_end - chunk_length_start
932                );
933
934                // There are extensions
935                if chunk_length_end < cr {
936                  advance!(chunk_length_end + 1);
937                  move_to!(chunk_extensions);
938                } else {
939                  self.continue_without_data = true;
940                  advance!(cr + 2);
941
942                  if self.chunk_size == 0 {
943                    callback!(on_chunk, 3, 0);
944                    callback!(on_body, 3, 0);
945                    move_to!(trailer);
946                  } else {
947                    move_to!(chunk_data);
948                  }
949                }
950              }
951              None => {
952                if available >= self.max_header_length {
953                  fail!(UNEXPECTED_CHARACTER, "Chunk header too long");
954                } else {
955                  suspend!();
956                }
957              }
958            }
959          }
960
961          STATE_CHUNK_EXTENSIONS => {
962            match find_cr(data, available) {
963              Some(cr) => {
964                match ensure_valid_line(data, cr, available) {
965                  MatchResult::Continue => {}
966                  MatchResult::Suspend => {
967                    suspend!();
968                  }
969                  MatchResult::Stop => {
970                    fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
971                  }
972                }
973
974                let mut name_start = 0;
975                // Find the first between = or ;
976                let name_end_raw = find_char2(data, name_start, cr, b'=', b';').unwrap_or(cr);
977                let mut name_end = name_end_raw;
978
979                if !strip_ows(data, &mut name_start, &mut name_end, false) {
980                  fail!(UNEXPECTED_CHARACTER, "Expected chunk extension name");
981                }
982
983                if !validate_token(data, name_start, name_end) {
984                  fail!(UNEXPECTED_CHARACTER, "Invalid chunk extension name character");
985                }
986
987                // No value
988                if name_end == cr || data[name_end_raw] == b';' {
989                  callback!(on_chunk_extension_name, name_start, name_end - name_start);
990
991                  if name_end_raw == cr {
992                    advance!(cr + 2);
993
994                    if self.chunk_size == 0 {
995                      callback!(on_body);
996                      move_to!(trailer);
997                    } else {
998                      move_to!(chunk_data);
999                    }
1000                  } else {
1001                    advance!(name_end_raw + 1);
1002                    move_to!(chunk_extensions);
1003                  }
1004                } else {
1005                  // Get the value
1006                  let mut value_start = name_end_raw + 1;
1007                  let mut value_end: usize;
1008                  let next_extension: usize;
1009
1010                  // Strip OWS before the value
1011                  while value_start < cr && is_ws(data[value_start]) {
1012                    value_start += 1;
1013                  }
1014
1015                  if value_start == cr {
1016                    fail!(UNEXPECTED_CHARACTER, "Expected chunk extension value");
1017                  }
1018
1019                  // Quoted string
1020                  // RFC 9110 section 5.6.4
1021                  let mut quoted = false;
1022                  let quote_start = value_start;
1023                  if data[value_start] == b'"' {
1024                    quoted = true;
1025                    value_start += 1;
1026                    let mut quote_start = value_start;
1027
1028                    loop {
1029                      match find_char(data, quote_start, cr, b'"') {
1030                        Some(index) => {
1031                          // Count consecutive backslashes immediately before the quote
1032                          let mut backslash_count = 0usize;
1033                          let mut i = index;
1034
1035                          while i > quote_start && data[i - 1] == b'\\' {
1036                            backslash_count += 1;
1037                            i -= 1;
1038                          }
1039
1040                          if backslash_count.is_multiple_of(2) {
1041                            // quote is not escaped
1042                            value_end = index;
1043                            break;
1044                          } else {
1045                            // quote is escaped, continue searching after it
1046                            quote_start = index + 1;
1047                          }
1048                        }
1049                        None => {
1050                          fail!(UNEXPECTED_CHARACTER, "Expected closing quote for chunk extension value");
1051                        }
1052                      };
1053                    }
1054
1055                    if !validate_quoted_string(data, value_start, value_end) {
1056                      fail!(UNEXPECTED_CHARACTER, "Invalid chunk extension quoted value character");
1057                    }
1058
1059                    next_extension = value_end + 1;
1060                  } else {
1061                    value_end = find_char(data, value_start, cr, b';').unwrap_or(cr);
1062                    next_extension = if value_end == cr { cr } else { value_end };
1063
1064                    if !strip_ows(data, &mut value_start, &mut value_end, false) {
1065                      fail!(UNEXPECTED_CHARACTER, "Expected chunk extension value");
1066                    }
1067
1068                    if value_start != value_end && !validate_token(data, value_start, value_end) {
1069                      fail!(UNEXPECTED_CHARACTER, "Invalid chunk extension value character");
1070                    }
1071                  }
1072
1073                  callback!(on_chunk_extension_name, name_start, name_end - name_start);
1074
1075                  if quoted {
1076                    callback!(on_chunk_extension_value, quote_start, value_end - quote_start + 1);
1077                  } else {
1078                    callback!(on_chunk_extension_value, value_start, value_end - value_start);
1079                  }
1080
1081                  let next_semicolon = find_char(data, next_extension, cr, b';').unwrap_or(cr);
1082
1083                  let mut i = next_extension;
1084                  while i < next_semicolon {
1085                    if !is_ws(data[i]) {
1086                      fail!(UNEXPECTED_CHARACTER, "Invalid chunk extension character after value");
1087                    }
1088                    i += 1;
1089                  }
1090
1091                  if next_semicolon < cr {
1092                    advance!(next_semicolon + 1);
1093                  } else {
1094                    advance!(cr + 2);
1095
1096                    if self.chunk_size == 0 {
1097                      callback!(on_body);
1098                      move_to!(trailer);
1099                    } else {
1100                      move_to!(chunk_data);
1101                    }
1102                  }
1103                }
1104              }
1105              None => {
1106                // Given in chunk_header we already validated this, this should not happen.
1107                if available >= self.max_header_length {
1108                  fail!(UNEXPECTED_CHARACTER, "Chunk header too long");
1109                } else {
1110                  suspend!();
1111                }
1112              }
1113            }
1114          }
1115
1116          STATE_CHUNK_DATA => {
1117            let expected = self.remaining_chunk_size;
1118            let available_64 = available as u64;
1119
1120            // No more data for this chunk, just wait for the CRLF
1121            if expected == 0 {
1122              if available < 2 {
1123                suspend!();
1124              } else if data[0] != b'\r' || data[1] != b'\n' {
1125                fail!(UNEXPECTED_CHARACTER, "Expected CRLF after chunk data");
1126              } else {
1127                advance!(2);
1128                move_to!(chunk_header);
1129              }
1130            } else if available_64 < expected {
1131              // Less data than what it is expected for this chunk
1132              self.remaining_chunk_size -= available_64;
1133
1134              callback!(on_chunk, 0, available);
1135              callback!(on_data, 0, available);
1136
1137              advance!(available);
1138            } else {
1139              self.remaining_chunk_size = 0;
1140
1141              callback!(on_chunk, 0, expected as usize);
1142              callback!(on_data, 0, expected as usize);
1143
1144              advance!(expected as usize);
1145            }
1146          }
1147
1148          // RFC 9112 section 7.1.2
1149          STATE_TRAILER => {
1150            match find_header_line_end(data.as_ptr(), available) {
1151              HeaderLineScanResult::Cr(cr) => {
1152                match ensure_valid_line(data, cr, available) {
1153                  MatchResult::Continue => {}
1154                  MatchResult::Suspend => {
1155                    suspend!();
1156                  }
1157                  MatchResult::Stop => {
1158                    fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
1159                  }
1160                }
1161
1162                // No more trailers or no trailers at all, message completed
1163                if cr == 0 {
1164                  callback!(on_trailers, 2, 0);
1165                  self.continue_without_data = true;
1166                  advance!(2);
1167                  self.complete(2);
1168                  next!();
1169                }
1170
1171                let trailer_name_start = 0;
1172                let trailer_name_end = match find_char(data, trailer_name_start, cr, b':') {
1173                  Some(index) if index > trailer_name_start => index,
1174                  _ => {
1175                    fail!(UNEXPECTED_CHARACTER, "Invalid trailer field name character");
1176                  }
1177                };
1178
1179                let mut trailer_value_start = trailer_name_end + 1;
1180                let mut trailer_value_end = cr;
1181                if has_trailer_value_callback {
1182                  strip_ows_fast(data, &mut trailer_value_start, &mut trailer_value_end, true);
1183                }
1184
1185                // Validate
1186                if !validate_token(data, trailer_name_start, trailer_name_end) {
1187                  fail!(UNEXPECTED_CHARACTER, "Invalid trailer field name character");
1188                }
1189
1190                if has_trailer_name_callback {
1191                  callback!(
1192                    on_trailer_name,
1193                    trailer_name_start,
1194                    trailer_name_end - trailer_name_start
1195                  );
1196                }
1197
1198                if has_trailer_value_callback {
1199                  callback!(
1200                    on_trailer_value,
1201                    trailer_value_start,
1202                    trailer_value_end - trailer_value_start
1203                  );
1204                }
1205                advance!(cr + 2);
1206              }
1207              HeaderLineScanResult::Invalid(invalid) => {
1208                match find_char(data, 0, invalid, b':') {
1209                  Some(_) => {
1210                    fail!(UNEXPECTED_CHARACTER, "Invalid trailer field value character");
1211                  }
1212                  None => {
1213                    fail!(UNEXPECTED_CHARACTER, "Invalid trailer field name character");
1214                  }
1215                }
1216              }
1217              HeaderLineScanResult::Incomplete => {
1218                if available >= self.max_header_length {
1219                  fail!(UNEXPECTED_CHARACTER, "Trailer line too long");
1220                } else {
1221                  suspend!();
1222                }
1223              }
1224            }
1225          }
1226
1227          // Return PAUSE makes this method idempotent without failing - In this state
1228          // all data is ignored since the connection is not in HTTP anymore
1229          STATE_TUNNEL => {
1230            suspend!();
1231          }
1232
1233          _ => {
1234            fail!(UNEXPECTED_STATE, "Invalid state");
1235          }
1236        }
1237      }
1238
1239      // Update the parser position
1240      if advanced > 0 {
1241        self.position += advanced;
1242        data = &data[advanced..];
1243        available -= advanced;
1244
1245        #[cfg(any(debug_assertions, feature = "debug"))]
1246        if self.debug {
1247          eprintln!(
1248            "[milo_parser::debug] loop before processing: position={}, advanced={}, available={}, \
1249             continue_without_data={}",
1250            self.position, advanced, available, self.continue_without_data
1251          );
1252        }
1253      }
1254
1255      // Notify the status change
1256      #[cfg(any(debug_assertions, feature = "debug"))]
1257      if previous_state != self.state {
1258        callback!(on_state_change);
1259        previous_state = self.state;
1260      }
1261
1262      // Show the duration of the operation
1263      #[cfg(all(not(target_family = "wasm"), any(debug_assertions, feature = "debug")))]
1264      if self.debug {
1265        let duration = Instant::now().duration_since(last).as_nanos();
1266
1267        if duration > 0 {
1268          eprintln!(
1269            "[milo_parser::debug] loop iteration ({:?}) completed in {} ns",
1270            self.state_str(),
1271            duration
1272          );
1273        }
1274
1275        last = Instant::now();
1276      }
1277    }
1278
1279    #[cfg(any(debug_assertions, feature = "debug"))]
1280    if self.debug {
1281      eprintln!("[milo_parser::debug] loop exit");
1282    }
1283
1284    let consumed = self.position;
1285    self.parsed += consumed as u64;
1286
1287    if self.manage_unconsumed {
1288      unsafe {
1289        // Drop any previous retained data
1290        if unconsumed_len > 0 {
1291          let _ = from_raw_parts(self.unconsumed, unconsumed_len);
1292        }
1293
1294        // If less bytes were consumed than requested, copy the unconsumed portion in
1295        // the self.for the next iteration
1296        if consumed < limit {
1297          let (ptr, len, _) = data.to_vec().into_raw_parts();
1298
1299          self.unconsumed = ptr;
1300          self.unconsumed_len = len;
1301        } else {
1302          self.unconsumed = ptr::null();
1303          self.unconsumed_len = 0;
1304        }
1305      }
1306    }
1307
1308    #[cfg(all(not(target_family = "wasm"), any(debug_assertions, feature = "debug")))]
1309    if self.debug {
1310      let duration = Instant::now().duration_since(start).as_nanos();
1311
1312      if duration > 0 {
1313        eprintln!(
1314          "[milo_parser::debug] parse ({:?}, consumed {} of {}) completed in {} ns",
1315          self.state_str(),
1316          consumed,
1317          limit,
1318          duration
1319        );
1320      }
1321    }
1322
1323    // Return the number of consumed bytes
1324    consumed
1325  }
1326
1327  // RFC 9110 section 6.4.1 - Message completed
1328  #[inline(always)]
1329  fn complete(&mut self, offset: usize) {
1330    if self.active_callbacks != 0 {
1331      callback!(on_message_complete, offset, 0);
1332      callback!(on_reset, offset, 0);
1333    }
1334
1335    self.continue_without_data = false;
1336    self.skip_body = false;
1337
1338    if self.has_upgrade && self.is_request {
1339      move_to!(tunnel);
1340    } else if self.has_connection_close {
1341      if self.active_callbacks != 0 {
1342        callback!(on_finish);
1343      }
1344      move_to!(finish);
1345    } else {
1346      move_to!(start);
1347    }
1348  }
1349}