1#![allow(clippy::not_unsafe_ptr_arg_deref)]
2
3extern crate alloc;
4
5use alloc::vec::Vec;
6use alloc::{boxed::Box, format};
7use core::cell::{Cell, RefCell};
8use core::ffi::{c_char, c_uchar, c_void};
9use core::fmt::Debug;
10use core::ptr;
11use core::str;
12use core::{slice, slice::from_raw_parts};
13use std::string;
14#[cfg(all(not(target_family = "wasm"), any(debug_assertions, feature = "debug")))]
15use std::time::Instant;
16
17use milo_macros::*;
18
19use crate::Methods::CONNECT;
20use crate::matchers::*;
21use crate::*;
22
23impl Parser {
24 pub fn parse(&mut self, input: *const c_uchar, limit: usize) -> usize {
28 if self.paused {
30 return 0;
31 }
32
33 let input = unsafe { from_raw_parts(input, limit) };
34
35 let mut limit = limit;
38 let aggregate: Vec<c_uchar>;
39 let unconsumed_len = self.unconsumed_len;
40
41 let mut data = if self.manage_unconsumed && unconsumed_len > 0 {
42 unsafe {
43 limit += unconsumed_len;
44 let unconsumed = from_raw_parts(self.unconsumed, unconsumed_len);
45
46 aggregate = [unconsumed, input].concat();
49 &aggregate[..]
50 }
51 } else {
52 input
53 };
54
55 data = &data[..limit];
57 let mut available = data.len();
58
59 #[cfg(all(not(target_family = "wasm"), any(debug_assertions, feature = "debug")))]
60 let mut last = Instant::now();
61
62 #[cfg(all(not(target_family = "wasm"), any(debug_assertions, feature = "debug")))]
63 let start = Instant::now();
64
65 #[cfg(any(debug_assertions, feature = "debug"))]
66 let mut previous_state = self.state;
67
68 #[cfg(any(debug_assertions, feature = "debug"))]
69 let previous_position = self.position;
70
71 self.position = 0;
74 let mut advanced: usize;
75 let mut parsing = true;
76 let has_active_callbacks = self.active_callbacks != 0;
77 let has_header_name_callback = self.active_callbacks & CALLBACK_ACTIVE_ON_HEADER_NAME != 0;
78 let has_header_value_callback = self.active_callbacks & CALLBACK_ACTIVE_ON_HEADER_VALUE != 0;
79 let has_trailer_name_callback = self.active_callbacks & CALLBACK_ACTIVE_ON_TRAILER_NAME != 0;
80 let has_trailer_value_callback = self.active_callbacks & CALLBACK_ACTIVE_ON_TRAILER_VALUE != 0;
81
82 #[cfg(any(debug_assertions, feature = "debug"))]
83 if self.debug {
84 eprintln!("[milo_parser::debug] loop enter");
85 }
86
87 'parser: while parsing && (!self.paused) && (available != 0 || self.continue_without_data) {
89 #[cfg(any(debug_assertions, feature = "debug"))]
90 if self.debug {
91 eprintln!(
92 "[milo_parser::debug] loop before processing: previous_position={}, position={}, available={}, \
93 continue_without_data={}",
94 previous_position, self.position, available, self.continue_without_data
95 );
96 }
97
98 self.continue_without_data = false;
101 advanced = 0;
102
103 'state: {
104 match self.state {
105 STATE_FINISH => {
107 fail!(UNEXPECTED_CHARACTER, "Unexpected data");
108 }
109
110 STATE_ERROR => {
112 suspend!();
113 }
114
115 STATE_START => {
117 if !self.autodetect && self.is_request {
118 if has_active_callbacks {
119 callback!(on_request);
120 callback!(on_message_start);
121 }
122 move_to!(request_line);
123 } else if !self.autodetect {
124 if has_active_callbacks {
125 callback!(on_response);
126 callback!(on_message_start);
127 }
128 move_to!(status_line);
129 } else if data.len() >= 5 && data[4] == b'/' && data.starts_with(b"HTTP") {
130 self.is_request = false;
131 if has_active_callbacks {
132 callback!(on_response);
133 callback!(on_message_start);
134 }
135 move_to!(status_line);
136 } else if data.len() >= 2 && data.starts_with(b"\r\n") {
137 advance!(2);
139 } else {
140 self.is_request = true;
143 if has_active_callbacks {
144 callback!(on_request);
145 callback!(on_message_start);
146 }
147 move_to!(request_line);
148 }
149 }
150
151 STATE_REQUEST_LINE => {
152 match find_cr(data, available) {
153 Some(cr) => {
155 match ensure_valid_line(data, cr, available) {
156 MatchResult::Continue => {}
157 MatchResult::Suspend => {
158 suspend!();
159 }
160 MatchResult::Stop => {
161 fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
162 }
163 }
164
165 if cr == 0 {
167 advance!(2);
168 next!();
169 } else if cr < 14
170 {
172 fail!(UNEXPECTED_CHARACTER, "Request line too short");
173 }
174
175 self.clear();
177
178 let method_start = 0;
180 let method_end = match find_char(data, method_start, cr, b' ') {
181 Some(index) if index > method_start => index,
182 _ => {
183 fail!(UNEXPECTED_CHARACTER, "Expected space after method");
184 }
185 };
186
187 let url_start = method_end + 1;
189 let url_end = match find_char(data, url_start, cr, b' ') {
190 Some(index) if index > url_start => index,
191 _ => {
192 fail!(UNEXPECTED_CHARACTER, "Expected space after URL");
193 }
194 };
195
196 let protocol_start = url_end + 1;
198 let protocol_end = match find_char(data, protocol_start, cr, b'/') {
199 Some(index) if index > protocol_start => index,
200 _ => {
201 fail!(UNEXPECTED_CHARACTER, "Expected / after the protocol name");
202 }
203 };
204
205 let method_slice = &data[method_start..method_end];
206 self.method = match method_slice.len() {
207 3 => {
208 match method_slice {
209 b"GET" => METHOD_GET,
210 b"PUT" => METHOD_PUT,
211 b"PRI" => METHOD_PRI,
212 _ => METHOD_OTHER,
213 }
214 }
215 4 => {
216 match method_slice {
217 b"HEAD" => METHOD_HEAD,
218 b"POST" => METHOD_POST,
219 _ => METHOD_OTHER,
220 }
221 }
222 5 => {
223 match method_slice {
224 b"PATCH" => METHOD_PATCH,
225 b"TRACE" => METHOD_TRACE,
226 _ => METHOD_OTHER,
227 }
228 }
229 6 => {
230 match method_slice {
231 b"DELETE" => METHOD_DELETE,
232 _ => METHOD_OTHER,
233 }
234 }
235 7 => {
236 match method_slice {
237 b"CONNECT" => {
238 self.is_connect = true;
239 METHOD_CONNECT
240 }
241 b"OPTIONS" => METHOD_OPTIONS,
242 _ => METHOD_OTHER,
243 }
244 }
245 _ => METHOD_OTHER,
246 };
247
248 if self.method == METHOD_OTHER && !validate_token(data, method_start, method_end) {
249 fail!(UNEXPECTED_CHARACTER, "Invalid method character");
250 }
251
252 if !validate_url(data, url_start, url_end) {
253 fail!(UNEXPECTED_CHARACTER, "Invalid URL character");
254 }
255
256 let version_start = protocol_end + 1;
257 if cr != protocol_start + 8 {
258 fail!(UNEXPECTED_CHARACTER, "Invalid protocol name");
259 }
260
261 if &data[protocol_start..cr] == b"HTTP/1.1" {
262 if self.method == METHOD_PRI {
263 fail!(UNSUPPORTED_HTTP_VERSION, "PRI is only valid with HTTP/2.0");
264 }
265
266 self.version_major = 1;
267 self.version_minor = 1;
268 } else if &data[protocol_start..cr] == b"HTTP/2.0" {
269 if self.method != METHOD_PRI {
270 fail!(UNSUPPORTED_HTTP_VERSION, "Unsupported HTTP version");
271 }
272
273 self.version_major = 2;
274 self.version_minor = 0;
275 } else {
276 fail!(UNEXPECTED_CHARACTER, "Invalid protocol");
277 }
278
279 if has_active_callbacks {
280 callback!(on_method, method_start, method_end - method_start);
281 callback!(on_url, url_start, url_end - url_start);
282 callback!(on_protocol, protocol_start, protocol_end - protocol_start);
283 callback!(on_version, version_start, 3);
284 }
285
286 advance!(cr + 2);
287
288 if self.method == METHOD_PRI {
289 move_to!(http2_preface);
290 } else {
291 move_to!(header);
292 }
293 }
294 None => {
295 if available >= self.max_start_line_length {
296 fail!(UNEXPECTED_CHARACTER, "Request line too long");
297 } else {
298 suspend!();
299 }
300 }
301 }
302 }
303
304 STATE_STATUS_LINE => {
306 match find_cr(data, available) {
307 Some(cr) => {
308 match ensure_valid_line(data, cr, available) {
309 MatchResult::Continue => {}
310 MatchResult::Suspend => {
311 suspend!();
312 }
313 MatchResult::Stop => {
314 fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
315 }
316 }
317
318 if cr == 0 {
320 advance!(2);
321 next!();
322 } else if cr < 13
323 {
325 fail!(UNEXPECTED_CHARACTER, "Status line too short");
326 }
327
328 self.clear();
330
331 let protocol_start = 0;
332 let protocol_end = 4;
333 let version_start = protocol_end + 1;
334 let version_end = protocol_start + 8;
335
336 if cr < version_end || data[version_end] != b' ' {
337 fail!(UNEXPECTED_CHARACTER, "Expected space after protocol");
338 }
339
340 match &data[protocol_start..version_end] {
341 b"HTTP/1.1" => {
342 self.version_major = 1;
343 self.version_minor = 1;
344 }
345 [b'H', b'T', b'T', b'P', b'/', ..] => {
346 fail!(UNSUPPORTED_HTTP_VERSION, "Unsupported HTTP version");
347 }
348 _ => {
349 fail!(UNEXPECTED_CHARACTER, "Invalid protocol");
350 }
351 }
352
353 let status_start = version_end + 1;
354 let status_end = version_end + 5;
357 if status_end > cr {
358 fail!(INVALID_STATUS, "Expected HTTP response status");
359 }
360
361 if !is_digit(data[status_start])
362 || !is_digit(data[status_start + 1])
363 || !is_digit(data[status_start + 2])
364 {
365 fail!(INVALID_STATUS, "Invalid HTTP response status");
366 }
367
368 if data[status_start + 3] != b' ' {
369 fail!(INVALID_STATUS, "Expected a space after HTTP response status");
370 }
371
372 let reason_start = status_start + 4;
373 let reason_end = cr;
374 if reason_start != reason_end
375 && unsafe { !validate_token_value(data.as_ptr().add(reason_start), reason_end - reason_start) }
376 {
377 fail!(UNEXPECTED_CHARACTER, "Invalid status reason character");
378 }
379
380 self.status = ((data[status_start] - b'0') as u32) * 100
381 + ((data[status_start + 1] - b'0') as u32) * 10
382 + (data[status_start + 2] - b'0') as u32;
383
384 if has_active_callbacks {
385 callback!(on_protocol, protocol_start, 4);
386 callback!(on_version, version_start, 3);
387 callback!(on_status, status_start, 3);
388 if reason_end > reason_start {
389 callback!(on_reason, reason_start, reason_end - reason_start);
390 }
391 }
392
393 advance!(cr + 2);
394 move_to!(header);
395 }
396 None => {
397 if available >= self.max_start_line_length {
398 fail!(UNEXPECTED_CHARACTER, "Status line too long");
399 } else {
400 suspend!();
401 }
402 }
403 }
404 }
405
406 STATE_HTTP2_PREFACE => {
407 if available < 8 {
408 suspend!();
409 } else if &data[..8] == b"\r\nSM\r\n\r\n" {
410 advance!(8);
411 move_to!(tunnel);
412 } else {
413 fail!(UNEXPECTED_CHARACTER, "Malformed HTTP/2.0 preface");
414 }
415 }
416
417 STATE_HEADER => {
418 match find_header_line_end(data.as_ptr(), available) {
419 HeaderLineScanResult::Cr(cr) => {
420 match ensure_valid_line(data, cr, available) {
421 MatchResult::Continue => {}
422 MatchResult::Suspend => {
423 suspend!();
424 }
425 MatchResult::Stop => {
426 fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
427 }
428 }
429
430 if cr == 0 {
432 self.continue_without_data = true;
433 advance!(2);
434 move_to!(body_decision);
435 next!();
436 }
437
438 let header_name_start = 0;
441 let header_name_end = match find_char(data, header_name_start, cr, b':') {
442 Some(index) if index > header_name_start => index,
443 _ => {
444 fail!(UNEXPECTED_CHARACTER, "Invalid header field name character");
445 }
446 };
447 let mut header_value_start = header_name_end + 1;
448 let mut header_value_end = cr;
449
450 let status = self.status;
451 let first_header_byte = data[header_name_start];
452 if !matches!(first_header_byte, b'c' | b'C' | b't' | b'T' | b'u' | b'U') {
453 if !validate_token(data, header_name_start, header_name_end) {
454 fail!(UNEXPECTED_CHARACTER, "Invalid header field name character");
455 }
456
457 if has_header_value_callback {
458 strip_ows_fast(data, &mut header_value_start, &mut header_value_end, true);
459 }
460 } else {
461 let header_name_len = header_name_end - header_name_start;
462 match (header_name_len, &data[header_name_start..header_name_end]) {
463 (14, case_insensitive_string!("content-length")) => {
465 if self.has_transfer_encoding {
466 fail!(
467 UNEXPECTED_CONTENT_LENGTH,
468 "Unexpected Content-Length header when Transfer-Encoding header is present"
469 );
470 } else if status == 205 || status == 204 || status / 100 == 1 {
471 fail!(
472 UNEXPECTED_CONTENT_LENGTH,
473 "Unexpected Content-Length header for a response without body"
474 );
475 } else if self.has_content_length {
476 fail!(INVALID_CONTENT_LENGTH, "Invalid duplicate Content-Length header");
477 }
478
479 if header_value_start < cr && !is_ws(data[cr - 1]) {
480 let value_start = if data[header_value_start] == b' ' {
481 header_value_start + 1
482 } else {
483 header_value_start
484 };
485
486 if value_start < cr && !is_ws(data[value_start]) {
487 header_value_start = value_start;
488 } else if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
489 fail!(UNEXPECTED_CHARACTER, "Expected Content-Length header value");
490 }
491 } else if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
492 fail!(UNEXPECTED_CHARACTER, "Expected Content-Length header value");
493 }
494
495 let mut i = header_value_start;
496 let mut content_length = 0u64;
497
498 if header_value_end - header_value_start > 19 {
499 fail!(INVALID_CONTENT_LENGTH, "Invalid Content-Length header");
503 }
504
505 while i < header_value_end {
506 let current = data[i];
507 if !is_digit(current) {
508 fail!(INVALID_CONTENT_LENGTH, "Invalid Content-Length header");
509 }
510
511 content_length = content_length * 10 + (current - b'0') as u64;
512 i += 1;
513 }
514
515 self.has_content_length = true;
516 self.content_length = content_length;
517 self.remaining_content_length = content_length;
518 }
519 (17, case_insensitive_string!("transfer-encoding")) => {
521 if self.has_content_length {
522 fail!(
523 UNEXPECTED_TRANSFER_ENCODING,
524 "Unexpected Transfer-Encoding header when Content-Length header is present"
525 );
526 } else if status == 304 || status == 205 || status == 204 || status / 100 == 1 {
527 fail!(
528 UNEXPECTED_TRANSFER_ENCODING,
529 "Unexpected Transfer-Encoding header for a response without body"
530 );
531 }
532
533 if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
534 fail!(UNEXPECTED_CHARACTER, "Expected Transfer-Encoding header value");
535 }
536
537 self.has_transfer_encoding = true;
538
539 if &data[header_value_start..header_value_end] == b"chunked" {
540 if self.has_chunked_transfer_encoding {
544 fail!(
545 INVALID_TRANSFER_ENCODING,
546 "The value \"chunked\" in the Transfer-Encoding header must be the last provided and can \
547 be provided only once"
548 );
549 }
550
551 self.has_chunked_transfer_encoding = true;
552 } else {
553 let mut token_start = header_value_start;
554 loop {
555 while token_start < header_value_end && is_ws(data[token_start]) {
556 token_start += 1;
557 }
558
559 if token_start == header_value_end {
560 break;
561 }
562
563 let token_end_raw = match find_char(data, token_start, header_value_end, b',') {
564 Some(comma) => comma,
565 None => header_value_end,
566 };
567 let mut token_end = token_end_raw;
568
569 if !strip_ows_fast(data, &mut token_start, &mut token_end, false) {
570 fail!(UNEXPECTED_CHARACTER, "Expected Transfer-Encoding header value");
571 }
572
573 self.has_transfer_encoding = true;
574
575 if let case_insensitive_string!("chunked") = data[token_start..token_end] {
576 if self.has_chunked_transfer_encoding {
580 fail!(
581 INVALID_TRANSFER_ENCODING,
582 "The value \"chunked\" in the Transfer-Encoding header must be the last provided and \
583 can be provided only once"
584 );
585 }
586
587 self.has_chunked_transfer_encoding = true;
588 } else {
589 if self.has_chunked_transfer_encoding {
590 fail!(
593 INVALID_TRANSFER_ENCODING,
594 "The value \"chunked\" in the Transfer-Encoding header must be the last provided"
595 );
596 }
597 }
598
599 if token_end_raw == header_value_end {
600 break;
601 } else {
602 token_start = token_end_raw + 1;
603 }
604 }
605 }
606 }
607 (10, case_insensitive_string!("connection")) => {
609 if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
610 fail!(UNEXPECTED_CHARACTER, "Expected Connection header value");
611 }
612
613 match data[header_value_start..header_value_end] {
614 case_insensitive_string!("close") => {
615 self.has_connection_close = true;
616 }
617 case_insensitive_string!("keep-alive") => {
618 }
621 case_insensitive_string!("upgrade") => {
622 self.has_connection_upgrade = true;
623 }
624 _ => {
625 let mut token_start = header_value_start;
627 loop {
628 while token_start < header_value_end && is_ws(data[token_start]) {
629 token_start += 1;
630 }
631
632 if token_start == header_value_end {
633 break;
634 }
635
636 let token_end_raw = match find_char(data, token_start, header_value_end, b',') {
637 Some(comma) => comma,
638 None => header_value_end,
639 };
640 let mut token_end = token_end_raw;
641
642 if !strip_ows_fast(data, &mut token_start, &mut token_end, false) {
643 fail!(UNEXPECTED_CHARACTER, "Expected Connection header value");
644 }
645
646 match data[token_start..token_end] {
647 case_insensitive_string!("close") => {
648 self.has_connection_close = true;
649 }
650 case_insensitive_string!("upgrade") => {
651 self.has_connection_upgrade = true;
652 }
653 case_insensitive_string!("keep-alive") => {}
654 _ => {
655 if !validate_token(data, token_start, token_end) {
656 fail!(UNEXPECTED_CHARACTER, "Invalid Connection header value");
657 }
658 }
659 }
660
661 if token_end_raw == header_value_end {
662 break;
663 } else {
664 token_start = token_end_raw + 1;
665 }
666 }
667 }
668 }
669 }
670 (7, case_insensitive_string!("trailer")) => {
671 self.has_trailers = true;
672
673 if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
674 fail!(UNEXPECTED_CHARACTER, "Expected Trailer header value");
675 }
676 }
677 (7, case_insensitive_string!("upgrade")) => {
678 if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
679 fail!(UNEXPECTED_CHARACTER, "Expected Upgrade header value");
680 }
681
682 let mut token_start = header_value_start;
683 loop {
684 while token_start < header_value_end && is_ws(data[token_start]) {
685 token_start += 1;
686 }
687
688 if token_start == header_value_end {
689 break;
690 }
691
692 let token_end_raw = match find_char(data, token_start, header_value_end, b',') {
693 Some(comma) => comma,
694 None => header_value_end,
695 };
696 let mut token_end = token_end_raw;
697
698 if !strip_ows_fast(data, &mut token_start, &mut token_end, false) {
699 fail!(UNEXPECTED_CHARACTER, "Expected Upgrade header value");
700 }
701
702 let protocol_name_end = find_char(data, token_start, token_end, b'/').unwrap_or(token_end);
703 if !validate_token(data, token_start, protocol_name_end) {
704 fail!(UNEXPECTED_CHARACTER, "Invalid Upgrade header value");
705 }
706
707 if protocol_name_end < token_end {
708 let protocol_version_start = protocol_name_end + 1;
709 if find_char(data, protocol_version_start, token_end, b'/').is_some()
710 || !validate_token(data, protocol_version_start, token_end)
711 {
712 fail!(UNEXPECTED_CHARACTER, "Invalid Upgrade header value");
713 }
714 }
715
716 if token_end_raw == header_value_end {
717 break;
718 } else {
719 token_start = token_end_raw + 1;
720 }
721 }
722
723 self.has_upgrade = true;
724 }
725 _ => {
726 if !validate_token(data, header_name_start, header_name_end) {
727 fail!(UNEXPECTED_CHARACTER, "Invalid header field name character");
728 }
729
730 if has_header_value_callback {
731 strip_ows_fast(data, &mut header_value_start, &mut header_value_end, true);
732 }
733 }
734 }
735 }
736
737 if has_header_name_callback {
738 callback!(on_header_name, header_name_start, header_name_end - header_name_start);
739 }
740
741 if has_header_value_callback {
742 callback!(
743 on_header_value,
744 header_value_start,
745 header_value_end - header_value_start
746 );
747 }
748
749 advance!(cr + 2);
750 }
751 HeaderLineScanResult::Invalid(invalid) => {
752 match find_char(data, 0, invalid, b':') {
753 Some(_) => {
754 fail!(UNEXPECTED_CHARACTER, "Invalid header field value character");
755 }
756 None => {
757 fail!(UNEXPECTED_CHARACTER, "Invalid header field name character");
758 }
759 }
760 }
761 HeaderLineScanResult::Incomplete => {
762 if available >= self.max_header_length {
763 fail!(UNEXPECTED_CHARACTER, "Header line too long");
764 } else {
765 suspend!();
766 }
767 }
768 }
769 }
770
771 STATE_BODY_DECISION => {
774 if has_active_callbacks {
775 callback!(on_headers);
776 }
777
778 let method = self.method;
779 let status = self.status;
780
781 if self.has_upgrade && !self.has_connection_upgrade {
782 fail!(
783 MISSING_CONNECTION_UPGRADE,
784 "Missing Connection header set to \"upgrade\" when using the Upgrade header"
785 );
786 }
787
788 if self.has_trailers && !self.has_chunked_transfer_encoding {
789 fail!(
790 UNEXPECTED_TRAILERS,
791 "Trailers are not allowed when not using chunked transfer encoding"
792 );
793 } else if self.is_request && (method == METHOD_GET || method == METHOD_HEAD) && self.content_length > 0 {
794 fail!(UNEXPECTED_CONTENT, "Unexpected content for the request (GET or HEAD)");
795 }
796
797 if self.is_connect {
799 callback!(on_connect);
801 move_to!(tunnel);
802 } else if self.has_upgrade && !self.is_request && status == 101 {
803 callback!(on_upgrade);
804 move_to!(tunnel);
805 } else if self.is_request {
806 if self.has_transfer_encoding && !self.has_chunked_transfer_encoding {
807 fail!(
808 UNEXPECTED_CONTENT_LENGTH,
809 "Transfer-Encoding last header value must be \"chunked\" if the header is present"
810 );
811 } else if self.skip_body {
812 self.continue_without_data = true;
813 self.complete(0);
814 } else if self.has_content_length {
815 if self.content_length == 0 {
817 self.continue_without_data = true;
818 self.complete(0);
819 } else {
820 move_to!(body_via_content_length);
821 }
822 } else if !self.has_chunked_transfer_encoding {
823 self.continue_without_data = true;
824 self.complete(0);
825 } else {
826 move_to!(chunk_header);
827 }
828 } else {
829 if self.skip_body || (status < 200 && status != 101) || status == 204 || status == 205 || status == 304 {
832 self.continue_without_data = true;
833 self.complete(0);
834 } else if self.has_content_length {
835 if self.content_length == 0 {
836 self.continue_without_data = true;
837 self.complete(0);
838 } else {
839 move_to!(body_via_content_length);
840 }
841 } else if self.has_chunked_transfer_encoding {
842 move_to!(chunk_header);
843 } else {
844 move_to!(body_with_no_length);
845 }
846 }
847 }
848
849 STATE_BODY_VIA_CONTENT_LENGTH => {
851 let expected = self.remaining_content_length;
852 let available_64 = available as u64;
853
854 if available_64 < expected {
856 self.remaining_content_length -= available_64;
857
858 callback!(on_data, 0, available);
859 advance!(available);
860 } else {
861 self.remaining_content_length = 0;
862
863 callback!(on_data, 0, expected as usize);
864 callback!(on_body, expected as usize, 0);
865
866 self.continue_without_data = true;
867
868 advance!(expected as usize);
869 self.complete(expected as usize);
870 }
871 }
872
873 STATE_BODY_WITH_NO_LENGTH => {
878 callback!(on_data, 0, available);
879 advance!(available);
880 }
881
882 STATE_CHUNK_HEADER => {
884 match find_cr(data, available) {
885 Some(cr) => {
886 match ensure_valid_line(data, cr, available) {
887 MatchResult::Continue => {}
888 MatchResult::Suspend => {
889 suspend!();
890 }
891 MatchResult::Stop => {
892 fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
893 }
894 }
895
896 let chunk_length_start = 0;
897 let chunk_length_end = match find_char(data, chunk_length_start, cr, b';') {
899 Some(index) => index,
900 None => cr,
901 };
902
903 if chunk_length_end == 0 {
904 fail!(UNEXPECTED_CHARACTER, "Invalid chunk length character");
905 } else if chunk_length_end - chunk_length_start > 16 {
906 fail!(INVALID_CHUNK_SIZE, "Invalid chunk length size");
907 }
908
909 let mut i = chunk_length_start;
910 let mut chunk_length = 0u64;
911 while i < chunk_length_end {
912 let b = data[i];
913
914 let hex = if b.is_ascii_digit() {
915 (b - b'0') as u64
916 } else if (b'a'..=b'f').contains(&b) {
917 (b - b'a' + 10) as u64
918 } else if (b'A'..=b'F').contains(&b) {
919 (b - b'A' + 10) as u64
920 } else {
921 fail!(UNEXPECTED_CHARACTER, "Invalid chunk length character");
922 };
923
924 chunk_length = chunk_length * 16 + hex;
925 i += 1;
926 }
927
928 self.chunk_size = chunk_length;
929 self.remaining_chunk_size = chunk_length;
930
931 callback!(
932 on_chunk_length,
933 chunk_length_start,
934 chunk_length_end - chunk_length_start
935 );
936
937 if chunk_length_end < cr {
939 advance!(chunk_length_end + 1);
940 move_to!(chunk_extensions);
941 } else {
942 self.continue_without_data = true;
943 advance!(cr + 2);
944
945 if self.chunk_size == 0 {
946 callback!(on_chunk, 3, 0);
947 callback!(on_body, 3, 0);
948 move_to!(trailer);
949 } else {
950 move_to!(chunk_data);
951 }
952 }
953 }
954 None => {
955 if available >= self.max_header_length {
956 fail!(UNEXPECTED_CHARACTER, "Chunk header too long");
957 } else {
958 suspend!();
959 }
960 }
961 }
962 }
963
964 STATE_CHUNK_EXTENSIONS => {
965 match find_cr(data, available) {
966 Some(cr) => {
967 match ensure_valid_line(data, cr, available) {
968 MatchResult::Continue => {}
969 MatchResult::Suspend => {
970 suspend!();
971 }
972 MatchResult::Stop => {
973 fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
974 }
975 }
976
977 let mut name_start = 0;
978 let name_end_raw = find_char2(data, name_start, cr, b'=', b';').unwrap_or(cr);
980 let mut name_end = name_end_raw;
981
982 if !strip_ows(data, &mut name_start, &mut name_end, false) {
983 fail!(UNEXPECTED_CHARACTER, "Expected chunk extension name");
984 }
985
986 if !validate_token(data, name_start, name_end) {
987 fail!(UNEXPECTED_CHARACTER, "Invalid chunk extension name character");
988 }
989
990 if name_end == cr || data[name_end_raw] == b';' {
992 callback!(on_chunk_extension_name, name_start, name_end - name_start);
993
994 if name_end_raw == cr {
995 advance!(cr + 2);
996
997 if self.chunk_size == 0 {
998 callback!(on_body);
999 move_to!(trailer);
1000 } else {
1001 move_to!(chunk_data);
1002 }
1003 } else {
1004 advance!(name_end_raw + 1);
1005 move_to!(chunk_extensions);
1006 }
1007 } else {
1008 let mut value_start = name_end_raw + 1;
1010 let mut value_end: usize;
1011 let next_extension: usize;
1012
1013 while value_start < cr && is_ws(data[value_start]) {
1015 value_start += 1;
1016 }
1017
1018 if value_start == cr {
1019 fail!(UNEXPECTED_CHARACTER, "Expected chunk extension value");
1020 }
1021
1022 let mut quoted = false;
1025 let quote_start = value_start;
1026 if data[value_start] == b'"' {
1027 quoted = true;
1028 value_start += 1;
1029 let mut quote_start = value_start;
1030
1031 loop {
1032 match find_char(data, quote_start, cr, b'"') {
1033 Some(index) => {
1034 let mut backslash_count = 0usize;
1036 let mut i = index;
1037
1038 while i > quote_start && data[i - 1] == b'\\' {
1039 backslash_count += 1;
1040 i -= 1;
1041 }
1042
1043 if backslash_count.is_multiple_of(2) {
1044 value_end = index;
1046 break;
1047 } else {
1048 quote_start = index + 1;
1050 }
1051 }
1052 None => {
1053 fail!(UNEXPECTED_CHARACTER, "Expected closing quote for chunk extension value");
1054 }
1055 };
1056 }
1057
1058 if !validate_quoted_string(data, value_start, value_end) {
1059 fail!(UNEXPECTED_CHARACTER, "Invalid chunk extension quoted value character");
1060 }
1061
1062 next_extension = value_end + 1;
1063 } else {
1064 value_end = find_char(data, value_start, cr, b';').unwrap_or(cr);
1065 next_extension = if value_end == cr { cr } else { value_end };
1066
1067 if !strip_ows(data, &mut value_start, &mut value_end, false) {
1068 fail!(UNEXPECTED_CHARACTER, "Expected chunk extension value");
1069 }
1070
1071 if value_start != value_end && !validate_token(data, value_start, value_end) {
1072 fail!(UNEXPECTED_CHARACTER, "Invalid chunk extension value character");
1073 }
1074 }
1075
1076 callback!(on_chunk_extension_name, name_start, name_end - name_start);
1077
1078 if quoted {
1079 callback!(on_chunk_extension_value, quote_start, value_end - quote_start + 1);
1080 } else {
1081 callback!(on_chunk_extension_value, value_start, value_end - value_start);
1082 }
1083
1084 let next_semicolon = find_char(data, next_extension, cr, b';').unwrap_or(cr);
1085
1086 let mut i = next_extension;
1087 while i < next_semicolon {
1088 if !is_ws(data[i]) {
1089 fail!(UNEXPECTED_CHARACTER, "Invalid chunk extension character after value");
1090 }
1091 i += 1;
1092 }
1093
1094 if next_semicolon < cr {
1095 advance!(next_semicolon + 1);
1096 } else {
1097 advance!(cr + 2);
1098
1099 if self.chunk_size == 0 {
1100 callback!(on_body);
1101 move_to!(trailer);
1102 } else {
1103 move_to!(chunk_data);
1104 }
1105 }
1106 }
1107 }
1108 None => {
1109 if available >= self.max_header_length {
1111 fail!(UNEXPECTED_CHARACTER, "Chunk header too long");
1112 } else {
1113 suspend!();
1114 }
1115 }
1116 }
1117 }
1118
1119 STATE_CHUNK_DATA => {
1120 let expected = self.remaining_chunk_size;
1121 let available_64 = available as u64;
1122
1123 if expected == 0 {
1125 if available < 2 {
1126 suspend!();
1127 } else if data[0] != b'\r' || data[1] != b'\n' {
1128 fail!(UNEXPECTED_CHARACTER, "Expected CRLF after chunk data");
1129 } else {
1130 advance!(2);
1131 move_to!(chunk_header);
1132 }
1133 } else if available_64 < expected {
1134 self.remaining_chunk_size -= available_64;
1136
1137 callback!(on_chunk, 0, available);
1138 callback!(on_data, 0, available);
1139
1140 advance!(available);
1141 } else {
1142 self.remaining_chunk_size = 0;
1143
1144 callback!(on_chunk, 0, expected as usize);
1145 callback!(on_data, 0, expected as usize);
1146
1147 advance!(expected as usize);
1148 }
1149 }
1150
1151 STATE_TRAILER => {
1153 match find_header_line_end(data.as_ptr(), available) {
1154 HeaderLineScanResult::Cr(cr) => {
1155 match ensure_valid_line(data, cr, available) {
1156 MatchResult::Continue => {}
1157 MatchResult::Suspend => {
1158 suspend!();
1159 }
1160 MatchResult::Stop => {
1161 fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
1162 }
1163 }
1164
1165 if cr == 0 {
1167 callback!(on_trailers, 2, 0);
1168 self.continue_without_data = true;
1169 advance!(2);
1170 self.complete(2);
1171 next!();
1172 }
1173
1174 let trailer_name_start = 0;
1175 let trailer_name_end = match find_char(data, trailer_name_start, cr, b':') {
1176 Some(index) if index > trailer_name_start => index,
1177 _ => {
1178 fail!(UNEXPECTED_CHARACTER, "Invalid trailer field name character");
1179 }
1180 };
1181
1182 let mut trailer_value_start = trailer_name_end + 1;
1183 let mut trailer_value_end = cr;
1184 if has_trailer_value_callback {
1185 strip_ows_fast(data, &mut trailer_value_start, &mut trailer_value_end, true);
1186 }
1187
1188 if !validate_token(data, trailer_name_start, trailer_name_end) {
1190 fail!(UNEXPECTED_CHARACTER, "Invalid trailer field name character");
1191 }
1192
1193 if has_trailer_name_callback {
1194 callback!(
1195 on_trailer_name,
1196 trailer_name_start,
1197 trailer_name_end - trailer_name_start
1198 );
1199 }
1200
1201 if has_trailer_value_callback {
1202 callback!(
1203 on_trailer_value,
1204 trailer_value_start,
1205 trailer_value_end - trailer_value_start
1206 );
1207 }
1208 advance!(cr + 2);
1209 }
1210 HeaderLineScanResult::Invalid(invalid) => {
1211 match find_char(data, 0, invalid, b':') {
1212 Some(_) => {
1213 fail!(UNEXPECTED_CHARACTER, "Invalid trailer field value character");
1214 }
1215 None => {
1216 fail!(UNEXPECTED_CHARACTER, "Invalid trailer field name character");
1217 }
1218 }
1219 }
1220 HeaderLineScanResult::Incomplete => {
1221 if available >= self.max_header_length {
1222 fail!(UNEXPECTED_CHARACTER, "Trailer line too long");
1223 } else {
1224 suspend!();
1225 }
1226 }
1227 }
1228 }
1229
1230 STATE_TUNNEL => {
1233 suspend!();
1234 }
1235
1236 _ => {
1237 fail!(UNEXPECTED_STATE, "Invalid state");
1238 }
1239 }
1240 }
1241
1242 if advanced > 0 {
1244 self.position += advanced;
1245 data = &data[advanced..];
1246 available -= advanced;
1247
1248 #[cfg(any(debug_assertions, feature = "debug"))]
1249 if self.debug {
1250 eprintln!(
1251 "[milo_parser::debug] loop before processing: position={}, advanced={}, available={}, \
1252 continue_without_data={}",
1253 self.position, advanced, available, self.continue_without_data
1254 );
1255 }
1256 }
1257
1258 #[cfg(any(debug_assertions, feature = "debug"))]
1260 if previous_state != self.state {
1261 callback!(on_state_change);
1262 previous_state = self.state;
1263 }
1264
1265 #[cfg(all(not(target_family = "wasm"), any(debug_assertions, feature = "debug")))]
1267 if self.debug {
1268 let duration = Instant::now().duration_since(last).as_nanos();
1269
1270 if duration > 0 {
1271 eprintln!(
1272 "[milo_parser::debug] loop iteration ({:?}) completed in {} ns",
1273 self.state_str(),
1274 duration
1275 );
1276 }
1277
1278 last = Instant::now();
1279 }
1280 }
1281
1282 #[cfg(any(debug_assertions, feature = "debug"))]
1283 if self.debug {
1284 eprintln!("[milo_parser::debug] loop exit");
1285 }
1286
1287 let consumed = self.position;
1288 self.parsed += consumed as u64;
1289
1290 if self.manage_unconsumed {
1291 unsafe {
1292 if unconsumed_len > 0 {
1294 let _ = from_raw_parts(self.unconsumed, unconsumed_len);
1295 }
1296
1297 if consumed < limit {
1300 let (ptr, len, _) = data.to_vec().into_raw_parts();
1301
1302 self.unconsumed = ptr;
1303 self.unconsumed_len = len;
1304 } else {
1305 self.unconsumed = ptr::null();
1306 self.unconsumed_len = 0;
1307 }
1308 }
1309 }
1310
1311 #[cfg(all(not(target_family = "wasm"), any(debug_assertions, feature = "debug")))]
1312 if self.debug {
1313 let duration = Instant::now().duration_since(start).as_nanos();
1314
1315 if duration > 0 {
1316 eprintln!(
1317 "[milo_parser::debug] parse ({:?}, consumed {} of {}) completed in {} ns",
1318 self.state_str(),
1319 consumed,
1320 limit,
1321 duration
1322 );
1323 }
1324 }
1325
1326 consumed
1328 }
1329
1330 #[inline(always)]
1332 fn complete(&mut self, offset: usize) {
1333 if self.active_callbacks != 0 {
1334 callback!(on_message_complete, offset, 0);
1335 callback!(on_reset, offset, 0);
1336 }
1337
1338 self.continue_without_data = false;
1339 self.skip_body = false;
1340
1341 if self.has_upgrade && self.is_request {
1342 move_to!(tunnel);
1343 } else if self.has_connection_close {
1344 if self.active_callbacks != 0 {
1345 callback!(on_finish);
1346 }
1347 move_to!(finish);
1348 } else {
1349 move_to!(start);
1350 }
1351 }
1352}