1#![allow(clippy::not_unsafe_ptr_arg_deref)]
2
3extern crate alloc;
4
5use alloc::vec::Vec;
6use alloc::{boxed::Box, format};
7use core::cell::{Cell, RefCell};
8use core::ffi::{c_char, c_uchar, c_void};
9use core::fmt::Debug;
10use core::ptr;
11use core::str;
12use core::{slice, slice::from_raw_parts};
13use std::string;
14#[cfg(all(not(target_family = "wasm"), any(debug_assertions, feature = "debug")))]
15use std::time::Instant;
16
17use milo_macros::*;
18
19use crate::Methods::CONNECT;
20use crate::matchers::*;
21use crate::*;
22
23impl Parser {
24 pub fn parse(&mut self, input: *const c_uchar, limit: usize) -> usize {
28 if self.paused {
30 return 0;
31 }
32
33 let input = unsafe { from_raw_parts(input, limit) };
34
35 let mut limit = limit;
38 let aggregate: Vec<c_uchar>;
39 let unconsumed_len = self.unconsumed_len;
40
41 let mut data = if self.manage_unconsumed && unconsumed_len > 0 {
42 unsafe {
43 limit += unconsumed_len;
44 let unconsumed = from_raw_parts(self.unconsumed, unconsumed_len);
45
46 aggregate = [unconsumed, input].concat();
47 &aggregate[..]
48 }
49 } else {
50 input
51 };
52
53 data = &data[..limit];
55 let mut available = data.len();
56
57 #[cfg(all(not(target_family = "wasm"), any(debug_assertions, feature = "debug")))]
58 let mut last = Instant::now();
59
60 #[cfg(all(not(target_family = "wasm"), any(debug_assertions, feature = "debug")))]
61 let start = Instant::now();
62
63 #[cfg(any(debug_assertions, feature = "debug"))]
64 let mut previous_state = self.state;
65
66 #[cfg(any(debug_assertions, feature = "debug"))]
67 let previous_position = self.position;
68
69 self.position = 0;
72 let mut advanced: usize;
73 let mut parsing = true;
74 let has_active_callbacks = self.active_callbacks != 0;
75 let has_header_name_callback = self.active_callbacks & CALLBACK_ACTIVE_ON_HEADER_NAME != 0;
76 let has_header_value_callback = self.active_callbacks & CALLBACK_ACTIVE_ON_HEADER_VALUE != 0;
77 let has_trailer_name_callback = self.active_callbacks & CALLBACK_ACTIVE_ON_TRAILER_NAME != 0;
78 let has_trailer_value_callback = self.active_callbacks & CALLBACK_ACTIVE_ON_TRAILER_VALUE != 0;
79
80 #[cfg(any(debug_assertions, feature = "debug"))]
81 if self.debug {
82 eprintln!("[milo_parser::debug] loop enter");
83 }
84
85 'parser: while parsing && (!self.paused) && (available != 0 || self.continue_without_data) {
87 #[cfg(any(debug_assertions, feature = "debug"))]
88 if self.debug {
89 eprintln!(
90 "[milo_parser::debug] loop before processing: previous_position={}, position={}, available={}, \
91 continue_without_data={}",
92 previous_position, self.position, available, self.continue_without_data
93 );
94 }
95
96 self.continue_without_data = false;
98 advanced = 0;
99
100 'state: {
101 match self.state {
102 STATE_FINISH => {
104 fail!(UNEXPECTED_CHARACTER, "Unexpected data");
105 }
106
107 STATE_ERROR => {
109 suspend!();
110 }
111
112 STATE_START => {
114 if !self.autodetect && self.is_request {
115 if has_active_callbacks {
116 callback!(on_request);
117 callback!(on_message_start);
118 }
119 move_to!(request_line);
120 } else if !self.autodetect {
121 if has_active_callbacks {
122 callback!(on_response);
123 callback!(on_message_start);
124 }
125 move_to!(status_line);
126 } else if data.len() >= 5 && data[4] == b'/' && data.starts_with(b"HTTP") {
127 self.is_request = false;
128 if has_active_callbacks {
129 callback!(on_response);
130 callback!(on_message_start);
131 }
132 move_to!(status_line);
133 } else if data.len() >= 2 && data.starts_with(b"\r\n") {
134 advance!(2);
136 } else {
137 self.is_request = true;
140 if has_active_callbacks {
141 callback!(on_request);
142 callback!(on_message_start);
143 }
144 move_to!(request_line);
145 }
146 }
147
148 STATE_REQUEST_LINE => {
149 match find_cr(data, available) {
150 Some(cr) => {
152 match ensure_valid_line(data, cr, available) {
153 MatchResult::Continue => {}
154 MatchResult::Suspend => {
155 suspend!();
156 }
157 MatchResult::Stop => {
158 fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
159 }
160 }
161
162 if cr == 0 {
164 advance!(2);
165 next!();
166 } else if cr < 14
167 {
169 fail!(UNEXPECTED_CHARACTER, "Request line too short");
170 }
171
172 self.clear();
174
175 let method_start = 0;
177 let method_end = match find_char(data, method_start, cr, b' ') {
178 Some(index) if index > method_start => index,
179 _ => {
180 fail!(UNEXPECTED_CHARACTER, "Expected space after method");
181 }
182 };
183
184 let url_start = method_end + 1;
186 let url_end = match find_char(data, url_start, cr, b' ') {
187 Some(index) if index > url_start => index,
188 _ => {
189 fail!(UNEXPECTED_CHARACTER, "Expected space after URL");
190 }
191 };
192
193 let protocol_start = url_end + 1;
195 let protocol_end = match find_char(data, protocol_start, cr, b'/') {
196 Some(index) if index > protocol_start => index,
197 _ => {
198 fail!(UNEXPECTED_CHARACTER, "Expected / after the protocol name");
199 }
200 };
201
202 let method_slice = &data[method_start..method_end];
203 self.method = match method_slice.len() {
204 3 => {
205 match method_slice {
206 b"GET" => METHOD_GET,
207 b"PUT" => METHOD_PUT,
208 b"PRI" => METHOD_PRI,
209 _ => METHOD_OTHER,
210 }
211 }
212 4 => {
213 match method_slice {
214 b"HEAD" => METHOD_HEAD,
215 b"POST" => METHOD_POST,
216 _ => METHOD_OTHER,
217 }
218 }
219 5 => {
220 match method_slice {
221 b"PATCH" => METHOD_PATCH,
222 b"TRACE" => METHOD_TRACE,
223 _ => METHOD_OTHER,
224 }
225 }
226 6 => {
227 match method_slice {
228 b"DELETE" => METHOD_DELETE,
229 _ => METHOD_OTHER,
230 }
231 }
232 7 => {
233 match method_slice {
234 b"CONNECT" => {
235 self.is_connect = true;
236 METHOD_CONNECT
237 }
238 b"OPTIONS" => METHOD_OPTIONS,
239 _ => METHOD_OTHER,
240 }
241 }
242 _ => METHOD_OTHER,
243 };
244
245 if self.method == METHOD_OTHER && !validate_token(data, method_start, method_end) {
246 fail!(UNEXPECTED_CHARACTER, "Invalid method character");
247 }
248
249 if !validate_url(data, url_start, url_end) {
250 fail!(UNEXPECTED_CHARACTER, "Invalid URL character");
251 }
252
253 let version_start = protocol_end + 1;
254 if cr != protocol_start + 8 {
255 fail!(UNEXPECTED_CHARACTER, "Invalid protocol name");
256 }
257
258 if &data[protocol_start..cr] == b"HTTP/1.1" {
259 if self.method == METHOD_PRI {
260 fail!(UNSUPPORTED_HTTP_VERSION, "PRI is only valid with HTTP/2.0");
261 }
262
263 self.version_major = 1;
264 self.version_minor = 1;
265 } else if &data[protocol_start..cr] == b"HTTP/2.0" {
266 if self.method != METHOD_PRI {
267 fail!(UNSUPPORTED_HTTP_VERSION, "Unsupported HTTP version");
268 }
269
270 self.version_major = 2;
271 self.version_minor = 0;
272 } else {
273 fail!(UNEXPECTED_CHARACTER, "Invalid protocol");
274 }
275
276 if has_active_callbacks {
277 callback!(on_method, method_start, method_end - method_start);
278 callback!(on_url, url_start, url_end - url_start);
279 callback!(on_protocol, protocol_start, protocol_end - protocol_start);
280 callback!(on_version, version_start, 3);
281 }
282
283 advance!(cr + 2);
284
285 if self.method == METHOD_PRI {
286 move_to!(http2_preface);
287 } else {
288 move_to!(header);
289 }
290 }
291 None => {
292 if available >= self.max_start_line_length {
293 fail!(UNEXPECTED_CHARACTER, "Request line too long");
294 } else {
295 suspend!();
296 }
297 }
298 }
299 }
300
301 STATE_STATUS_LINE => {
303 match find_cr(data, available) {
304 Some(cr) => {
305 match ensure_valid_line(data, cr, available) {
306 MatchResult::Continue => {}
307 MatchResult::Suspend => {
308 suspend!();
309 }
310 MatchResult::Stop => {
311 fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
312 }
313 }
314
315 if cr == 0 {
317 advance!(2);
318 next!();
319 } else if cr < 13
320 {
322 fail!(UNEXPECTED_CHARACTER, "Status line too short");
323 }
324
325 self.clear();
327
328 let protocol_start = 0;
329 let protocol_end = 4;
330 let version_start = protocol_end + 1;
331 let version_end = protocol_start + 8;
332
333 if cr < version_end || data[version_end] != b' ' {
334 fail!(UNEXPECTED_CHARACTER, "Expected space after protocol");
335 }
336
337 match &data[protocol_start..version_end] {
338 b"HTTP/1.1" => {
339 self.version_major = 1;
340 self.version_minor = 1;
341 }
342 [b'H', b'T', b'T', b'P', b'/', ..] => {
343 fail!(UNSUPPORTED_HTTP_VERSION, "Unsupported HTTP version");
344 }
345 _ => {
346 fail!(UNEXPECTED_CHARACTER, "Invalid protocol");
347 }
348 }
349
350 let status_start = version_end + 1;
351 let status_end = version_end + 5;
354 if status_end > cr {
355 fail!(INVALID_STATUS, "Expected HTTP response status");
356 }
357
358 if !is_digit(data[status_start])
359 || !is_digit(data[status_start + 1])
360 || !is_digit(data[status_start + 2])
361 {
362 fail!(INVALID_STATUS, "Invalid HTTP response status");
363 }
364
365 if data[status_start + 3] != b' ' {
366 fail!(INVALID_STATUS, "Expected a space after HTTP response status");
367 }
368
369 let reason_start = status_start + 4;
370 let reason_end = cr;
371 if reason_start != reason_end
372 && unsafe { !validate_token_value(data.as_ptr().add(reason_start), reason_end - reason_start) }
373 {
374 fail!(UNEXPECTED_CHARACTER, "Invalid status reason character");
375 }
376
377 self.status = ((data[status_start] - b'0') as u32) * 100
378 + ((data[status_start + 1] - b'0') as u32) * 10
379 + (data[status_start + 2] - b'0') as u32;
380
381 if has_active_callbacks {
382 callback!(on_protocol, protocol_start, 4);
383 callback!(on_version, version_start, 3);
384 callback!(on_status, status_start, 3);
385 if reason_end > reason_start {
386 callback!(on_reason, reason_start, reason_end - reason_start);
387 }
388 }
389
390 advance!(cr + 2);
391 move_to!(header);
392 }
393 None => {
394 if available >= self.max_start_line_length {
395 fail!(UNEXPECTED_CHARACTER, "Status line too long");
396 } else {
397 suspend!();
398 }
399 }
400 }
401 }
402
403 STATE_HTTP2_PREFACE => {
404 if available < 8 {
405 suspend!();
406 } else if &data[..8] == b"\r\nSM\r\n\r\n" {
407 advance!(8);
408 move_to!(tunnel);
409 } else {
410 fail!(UNEXPECTED_CHARACTER, "Malformed HTTP/2.0 preface");
411 }
412 }
413
414 STATE_HEADER => {
415 match find_header_line_end(data.as_ptr(), available) {
416 HeaderLineScanResult::Cr(cr) => {
417 match ensure_valid_line(data, cr, available) {
418 MatchResult::Continue => {}
419 MatchResult::Suspend => {
420 suspend!();
421 }
422 MatchResult::Stop => {
423 fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
424 }
425 }
426
427 if cr == 0 {
429 self.continue_without_data = true;
430 advance!(2);
431 move_to!(body_decision);
432 next!();
433 }
434
435 let header_name_start = 0;
438 let header_name_end = match find_char(data, header_name_start, cr, b':') {
439 Some(index) if index > header_name_start => index,
440 _ => {
441 fail!(UNEXPECTED_CHARACTER, "Invalid header field name character");
442 }
443 };
444 let mut header_value_start = header_name_end + 1;
445 let mut header_value_end = cr;
446
447 let status = self.status;
448 let first_header_byte = data[header_name_start];
449 if !matches!(first_header_byte, b'c' | b'C' | b't' | b'T' | b'u' | b'U') {
450 if !validate_token(data, header_name_start, header_name_end) {
451 fail!(UNEXPECTED_CHARACTER, "Invalid header field name character");
452 }
453
454 if has_header_value_callback {
455 strip_ows_fast(data, &mut header_value_start, &mut header_value_end, true);
456 }
457 } else {
458 let header_name_len = header_name_end - header_name_start;
459 match (header_name_len, &data[header_name_start..header_name_end]) {
460 (14, case_insensitive_string!("content-length")) => {
462 if self.has_transfer_encoding {
463 fail!(
464 UNEXPECTED_CONTENT_LENGTH,
465 "Unexpected Content-Length header when Transfer-Encoding header is present"
466 );
467 } else if status == 205 || status == 204 || status / 100 == 1 {
468 fail!(
469 UNEXPECTED_CONTENT_LENGTH,
470 "Unexpected Content-Length header for a response without body"
471 );
472 } else if self.has_content_length {
473 fail!(INVALID_CONTENT_LENGTH, "Invalid duplicate Content-Length header");
474 }
475
476 if header_value_start < cr && !is_ws(data[cr - 1]) {
477 let value_start = if data[header_value_start] == b' ' {
478 header_value_start + 1
479 } else {
480 header_value_start
481 };
482
483 if value_start < cr && !is_ws(data[value_start]) {
484 header_value_start = value_start;
485 } else if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
486 fail!(UNEXPECTED_CHARACTER, "Expected Content-Length header value");
487 }
488 } else if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
489 fail!(UNEXPECTED_CHARACTER, "Expected Content-Length header value");
490 }
491
492 let mut i = header_value_start;
493 let mut content_length = 0u64;
494
495 if header_value_end - header_value_start > 19 {
496 fail!(INVALID_CONTENT_LENGTH, "Invalid Content-Length header");
500 }
501
502 while i < header_value_end {
503 let current = data[i];
504 if !is_digit(current) {
505 fail!(INVALID_CONTENT_LENGTH, "Invalid Content-Length header");
506 }
507
508 content_length = content_length * 10 + (current - b'0') as u64;
509 i += 1;
510 }
511
512 self.has_content_length = true;
513 self.content_length = content_length;
514 self.remaining_content_length = content_length;
515 }
516 (17, case_insensitive_string!("transfer-encoding")) => {
518 if self.has_content_length {
519 fail!(
520 UNEXPECTED_TRANSFER_ENCODING,
521 "Unexpected Transfer-Encoding header when Content-Length header is present"
522 );
523 } else if status == 304 || status == 205 || status == 204 || status / 100 == 1 {
524 fail!(
525 UNEXPECTED_TRANSFER_ENCODING,
526 "Unexpected Transfer-Encoding header for a response without body"
527 );
528 }
529
530 if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
531 fail!(UNEXPECTED_CHARACTER, "Expected Transfer-Encoding header value");
532 }
533
534 self.has_transfer_encoding = true;
535
536 if &data[header_value_start..header_value_end] == b"chunked" {
537 if self.has_chunked_transfer_encoding {
541 fail!(
542 INVALID_TRANSFER_ENCODING,
543 "The value \"chunked\" in the Transfer-Encoding header must be the last provided and can \
544 be provided only once"
545 );
546 }
547
548 self.has_chunked_transfer_encoding = true;
549 } else {
550 let mut token_start = header_value_start;
551 loop {
552 while token_start < header_value_end && is_ws(data[token_start]) {
553 token_start += 1;
554 }
555
556 if token_start == header_value_end {
557 break;
558 }
559
560 let token_end_raw = match find_char(data, token_start, header_value_end, b',') {
561 Some(comma) => comma,
562 None => header_value_end,
563 };
564 let mut token_end = token_end_raw;
565
566 if !strip_ows_fast(data, &mut token_start, &mut token_end, false) {
567 fail!(UNEXPECTED_CHARACTER, "Expected Transfer-Encoding header value");
568 }
569
570 self.has_transfer_encoding = true;
571
572 if let case_insensitive_string!("chunked") = data[token_start..token_end] {
573 if self.has_chunked_transfer_encoding {
577 fail!(
578 INVALID_TRANSFER_ENCODING,
579 "The value \"chunked\" in the Transfer-Encoding header must be the last provided and \
580 can be provided only once"
581 );
582 }
583
584 self.has_chunked_transfer_encoding = true;
585 } else {
586 if self.has_chunked_transfer_encoding {
587 fail!(
590 INVALID_TRANSFER_ENCODING,
591 "The value \"chunked\" in the Transfer-Encoding header must be the last provided"
592 );
593 }
594 }
595
596 if token_end_raw == header_value_end {
597 break;
598 } else {
599 token_start = token_end_raw + 1;
600 }
601 }
602 }
603 }
604 (10, case_insensitive_string!("connection")) => {
606 if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
607 fail!(UNEXPECTED_CHARACTER, "Expected Connection header value");
608 }
609
610 match data[header_value_start..header_value_end] {
611 case_insensitive_string!("close") => {
612 self.has_connection_close = true;
613 }
614 case_insensitive_string!("keep-alive") => {
615 }
618 case_insensitive_string!("upgrade") => {
619 self.has_connection_upgrade = true;
620 }
621 _ => {
622 let mut token_start = header_value_start;
624 loop {
625 while token_start < header_value_end && is_ws(data[token_start]) {
626 token_start += 1;
627 }
628
629 if token_start == header_value_end {
630 break;
631 }
632
633 let token_end_raw = match find_char(data, token_start, header_value_end, b',') {
634 Some(comma) => comma,
635 None => header_value_end,
636 };
637 let mut token_end = token_end_raw;
638
639 if !strip_ows_fast(data, &mut token_start, &mut token_end, false) {
640 fail!(UNEXPECTED_CHARACTER, "Expected Connection header value");
641 }
642
643 match data[token_start..token_end] {
644 case_insensitive_string!("close") => {
645 self.has_connection_close = true;
646 }
647 case_insensitive_string!("upgrade") => {
648 self.has_connection_upgrade = true;
649 }
650 case_insensitive_string!("keep-alive") => {}
651 _ => {
652 if !validate_token(data, token_start, token_end) {
653 fail!(UNEXPECTED_CHARACTER, "Invalid Connection header value");
654 }
655 }
656 }
657
658 if token_end_raw == header_value_end {
659 break;
660 } else {
661 token_start = token_end_raw + 1;
662 }
663 }
664 }
665 }
666 }
667 (7, case_insensitive_string!("trailer")) => {
668 self.has_trailers = true;
669
670 if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
671 fail!(UNEXPECTED_CHARACTER, "Expected Trailer header value");
672 }
673 }
674 (7, case_insensitive_string!("upgrade")) => {
675 if !strip_ows_fast(data, &mut header_value_start, &mut header_value_end, false) {
676 fail!(UNEXPECTED_CHARACTER, "Expected Upgrade header value");
677 }
678
679 let mut token_start = header_value_start;
680 loop {
681 while token_start < header_value_end && is_ws(data[token_start]) {
682 token_start += 1;
683 }
684
685 if token_start == header_value_end {
686 break;
687 }
688
689 let token_end_raw = match find_char(data, token_start, header_value_end, b',') {
690 Some(comma) => comma,
691 None => header_value_end,
692 };
693 let mut token_end = token_end_raw;
694
695 if !strip_ows_fast(data, &mut token_start, &mut token_end, false) {
696 fail!(UNEXPECTED_CHARACTER, "Expected Upgrade header value");
697 }
698
699 let protocol_name_end = find_char(data, token_start, token_end, b'/').unwrap_or(token_end);
700 if !validate_token(data, token_start, protocol_name_end) {
701 fail!(UNEXPECTED_CHARACTER, "Invalid Upgrade header value");
702 }
703
704 if protocol_name_end < token_end {
705 let protocol_version_start = protocol_name_end + 1;
706 if find_char(data, protocol_version_start, token_end, b'/').is_some()
707 || !validate_token(data, protocol_version_start, token_end)
708 {
709 fail!(UNEXPECTED_CHARACTER, "Invalid Upgrade header value");
710 }
711 }
712
713 if token_end_raw == header_value_end {
714 break;
715 } else {
716 token_start = token_end_raw + 1;
717 }
718 }
719
720 self.has_upgrade = true;
721 }
722 _ => {
723 if !validate_token(data, header_name_start, header_name_end) {
724 fail!(UNEXPECTED_CHARACTER, "Invalid header field name character");
725 }
726
727 if has_header_value_callback {
728 strip_ows_fast(data, &mut header_value_start, &mut header_value_end, true);
729 }
730 }
731 }
732 }
733
734 if has_header_name_callback {
735 callback!(on_header_name, header_name_start, header_name_end - header_name_start);
736 }
737
738 if has_header_value_callback {
739 callback!(
740 on_header_value,
741 header_value_start,
742 header_value_end - header_value_start
743 );
744 }
745
746 advance!(cr + 2);
747 }
748 HeaderLineScanResult::Invalid(invalid) => {
749 match find_char(data, 0, invalid, b':') {
750 Some(_) => {
751 fail!(UNEXPECTED_CHARACTER, "Invalid header field value character");
752 }
753 None => {
754 fail!(UNEXPECTED_CHARACTER, "Invalid header field name character");
755 }
756 }
757 }
758 HeaderLineScanResult::Incomplete => {
759 if available >= self.max_header_length {
760 fail!(UNEXPECTED_CHARACTER, "Header line too long");
761 } else {
762 suspend!();
763 }
764 }
765 }
766 }
767
768 STATE_BODY_DECISION => {
771 if has_active_callbacks {
772 callback!(on_headers);
773 }
774
775 let method = self.method;
776 let status = self.status;
777
778 if self.has_upgrade && !self.has_connection_upgrade {
779 fail!(
780 MISSING_CONNECTION_UPGRADE,
781 "Missing Connection header set to \"upgrade\" when using the Upgrade header"
782 );
783 }
784
785 if self.has_trailers && !self.has_chunked_transfer_encoding {
786 fail!(
787 UNEXPECTED_TRAILERS,
788 "Trailers are not allowed when not using chunked transfer encoding"
789 );
790 } else if self.is_request && (method == METHOD_GET || method == METHOD_HEAD) && self.content_length > 0 {
791 fail!(UNEXPECTED_CONTENT, "Unexpected content for the request (GET or HEAD)");
792 }
793
794 if self.is_connect {
796 callback!(on_connect);
798 move_to!(tunnel);
799 } else if self.has_upgrade && !self.is_request && status == 101 {
800 callback!(on_upgrade);
801 move_to!(tunnel);
802 } else if self.is_request {
803 if self.has_transfer_encoding && !self.has_chunked_transfer_encoding {
804 fail!(
805 UNEXPECTED_CONTENT_LENGTH,
806 "Transfer-Encoding last header value must be \"chunked\" if the header is present"
807 );
808 } else if self.skip_body {
809 self.continue_without_data = true;
810 self.complete(0);
811 } else if self.has_content_length {
812 if self.content_length == 0 {
814 self.continue_without_data = true;
815 self.complete(0);
816 } else {
817 move_to!(body_via_content_length);
818 }
819 } else if !self.has_chunked_transfer_encoding {
820 self.continue_without_data = true;
821 self.complete(0);
822 } else {
823 move_to!(chunk_header);
824 }
825 } else {
826 if self.skip_body || (status < 200 && status != 101) || status == 204 || status == 205 || status == 304 {
829 self.continue_without_data = true;
830 self.complete(0);
831 } else if self.has_content_length {
832 if self.content_length == 0 {
833 self.continue_without_data = true;
834 self.complete(0);
835 } else {
836 move_to!(body_via_content_length);
837 }
838 } else if self.has_chunked_transfer_encoding {
839 move_to!(chunk_header);
840 } else {
841 move_to!(body_with_no_length);
842 }
843 }
844 }
845
846 STATE_BODY_VIA_CONTENT_LENGTH => {
848 let expected = self.remaining_content_length;
849 let available_64 = available as u64;
850
851 if available_64 < expected {
853 self.remaining_content_length -= available_64;
854
855 callback!(on_data, 0, available);
856 advance!(available);
857 } else {
858 self.remaining_content_length = 0;
859
860 callback!(on_data, 0, expected as usize);
861 callback!(on_body, expected as usize, 0);
862
863 self.continue_without_data = true;
864
865 advance!(expected as usize);
866 self.complete(expected as usize);
867 }
868 }
869
870 STATE_BODY_WITH_NO_LENGTH => {
875 callback!(on_data, 0, available);
876 advance!(available);
877 }
878
879 STATE_CHUNK_HEADER => {
881 match find_cr(data, available) {
882 Some(cr) => {
883 match ensure_valid_line(data, cr, available) {
884 MatchResult::Continue => {}
885 MatchResult::Suspend => {
886 suspend!();
887 }
888 MatchResult::Stop => {
889 fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
890 }
891 }
892
893 let chunk_length_start = 0;
894 let chunk_length_end = match find_char(data, chunk_length_start, cr, b';') {
896 Some(index) => index,
897 None => cr,
898 };
899
900 if chunk_length_end == 0 {
901 fail!(UNEXPECTED_CHARACTER, "Invalid chunk length character");
902 } else if chunk_length_end - chunk_length_start > 16 {
903 fail!(INVALID_CHUNK_SIZE, "Invalid chunk length size");
904 }
905
906 let mut i = chunk_length_start;
907 let mut chunk_length = 0u64;
908 while i < chunk_length_end {
909 let b = data[i];
910
911 let hex = if b.is_ascii_digit() {
912 (b - b'0') as u64
913 } else if (b'a'..=b'f').contains(&b) {
914 (b - b'a' + 10) as u64
915 } else if (b'A'..=b'F').contains(&b) {
916 (b - b'A' + 10) as u64
917 } else {
918 fail!(UNEXPECTED_CHARACTER, "Invalid chunk length character");
919 };
920
921 chunk_length = chunk_length * 16 + hex;
922 i += 1;
923 }
924
925 self.chunk_size = chunk_length;
926 self.remaining_chunk_size = chunk_length;
927
928 callback!(
929 on_chunk_length,
930 chunk_length_start,
931 chunk_length_end - chunk_length_start
932 );
933
934 if chunk_length_end < cr {
936 advance!(chunk_length_end + 1);
937 move_to!(chunk_extensions);
938 } else {
939 self.continue_without_data = true;
940 advance!(cr + 2);
941
942 if self.chunk_size == 0 {
943 callback!(on_chunk, 3, 0);
944 callback!(on_body, 3, 0);
945 move_to!(trailer);
946 } else {
947 move_to!(chunk_data);
948 }
949 }
950 }
951 None => {
952 if available >= self.max_header_length {
953 fail!(UNEXPECTED_CHARACTER, "Chunk header too long");
954 } else {
955 suspend!();
956 }
957 }
958 }
959 }
960
961 STATE_CHUNK_EXTENSIONS => {
962 match find_cr(data, available) {
963 Some(cr) => {
964 match ensure_valid_line(data, cr, available) {
965 MatchResult::Continue => {}
966 MatchResult::Suspend => {
967 suspend!();
968 }
969 MatchResult::Stop => {
970 fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
971 }
972 }
973
974 let mut name_start = 0;
975 let name_end_raw = find_char2(data, name_start, cr, b'=', b';').unwrap_or(cr);
977 let mut name_end = name_end_raw;
978
979 if !strip_ows(data, &mut name_start, &mut name_end, false) {
980 fail!(UNEXPECTED_CHARACTER, "Expected chunk extension name");
981 }
982
983 if !validate_token(data, name_start, name_end) {
984 fail!(UNEXPECTED_CHARACTER, "Invalid chunk extension name character");
985 }
986
987 if name_end == cr || data[name_end_raw] == b';' {
989 callback!(on_chunk_extension_name, name_start, name_end - name_start);
990
991 if name_end_raw == cr {
992 advance!(cr + 2);
993
994 if self.chunk_size == 0 {
995 callback!(on_body);
996 move_to!(trailer);
997 } else {
998 move_to!(chunk_data);
999 }
1000 } else {
1001 advance!(name_end_raw + 1);
1002 move_to!(chunk_extensions);
1003 }
1004 } else {
1005 let mut value_start = name_end_raw + 1;
1007 let mut value_end: usize;
1008 let next_extension: usize;
1009
1010 while value_start < cr && is_ws(data[value_start]) {
1012 value_start += 1;
1013 }
1014
1015 if value_start == cr {
1016 fail!(UNEXPECTED_CHARACTER, "Expected chunk extension value");
1017 }
1018
1019 let mut quoted = false;
1022 let quote_start = value_start;
1023 if data[value_start] == b'"' {
1024 quoted = true;
1025 value_start += 1;
1026 let mut quote_start = value_start;
1027
1028 loop {
1029 match find_char(data, quote_start, cr, b'"') {
1030 Some(index) => {
1031 let mut backslash_count = 0usize;
1033 let mut i = index;
1034
1035 while i > quote_start && data[i - 1] == b'\\' {
1036 backslash_count += 1;
1037 i -= 1;
1038 }
1039
1040 if backslash_count.is_multiple_of(2) {
1041 value_end = index;
1043 break;
1044 } else {
1045 quote_start = index + 1;
1047 }
1048 }
1049 None => {
1050 fail!(UNEXPECTED_CHARACTER, "Expected closing quote for chunk extension value");
1051 }
1052 };
1053 }
1054
1055 if !validate_quoted_string(data, value_start, value_end) {
1056 fail!(UNEXPECTED_CHARACTER, "Invalid chunk extension quoted value character");
1057 }
1058
1059 next_extension = value_end + 1;
1060 } else {
1061 value_end = find_char(data, value_start, cr, b';').unwrap_or(cr);
1062 next_extension = if value_end == cr { cr } else { value_end };
1063
1064 if !strip_ows(data, &mut value_start, &mut value_end, false) {
1065 fail!(UNEXPECTED_CHARACTER, "Expected chunk extension value");
1066 }
1067
1068 if value_start != value_end && !validate_token(data, value_start, value_end) {
1069 fail!(UNEXPECTED_CHARACTER, "Invalid chunk extension value character");
1070 }
1071 }
1072
1073 callback!(on_chunk_extension_name, name_start, name_end - name_start);
1074
1075 if quoted {
1076 callback!(on_chunk_extension_value, quote_start, value_end - quote_start + 1);
1077 } else {
1078 callback!(on_chunk_extension_value, value_start, value_end - value_start);
1079 }
1080
1081 let next_semicolon = find_char(data, next_extension, cr, b';').unwrap_or(cr);
1082
1083 let mut i = next_extension;
1084 while i < next_semicolon {
1085 if !is_ws(data[i]) {
1086 fail!(UNEXPECTED_CHARACTER, "Invalid chunk extension character after value");
1087 }
1088 i += 1;
1089 }
1090
1091 if next_semicolon < cr {
1092 advance!(next_semicolon + 1);
1093 } else {
1094 advance!(cr + 2);
1095
1096 if self.chunk_size == 0 {
1097 callback!(on_body);
1098 move_to!(trailer);
1099 } else {
1100 move_to!(chunk_data);
1101 }
1102 }
1103 }
1104 }
1105 None => {
1106 if available >= self.max_header_length {
1108 fail!(UNEXPECTED_CHARACTER, "Chunk header too long");
1109 } else {
1110 suspend!();
1111 }
1112 }
1113 }
1114 }
1115
1116 STATE_CHUNK_DATA => {
1117 let expected = self.remaining_chunk_size;
1118 let available_64 = available as u64;
1119
1120 if expected == 0 {
1122 if available < 2 {
1123 suspend!();
1124 } else if data[0] != b'\r' || data[1] != b'\n' {
1125 fail!(UNEXPECTED_CHARACTER, "Expected CRLF after chunk data");
1126 } else {
1127 advance!(2);
1128 move_to!(chunk_header);
1129 }
1130 } else if available_64 < expected {
1131 self.remaining_chunk_size -= available_64;
1133
1134 callback!(on_chunk, 0, available);
1135 callback!(on_data, 0, available);
1136
1137 advance!(available);
1138 } else {
1139 self.remaining_chunk_size = 0;
1140
1141 callback!(on_chunk, 0, expected as usize);
1142 callback!(on_data, 0, expected as usize);
1143
1144 advance!(expected as usize);
1145 }
1146 }
1147
1148 STATE_TRAILER => {
1150 match find_header_line_end(data.as_ptr(), available) {
1151 HeaderLineScanResult::Cr(cr) => {
1152 match ensure_valid_line(data, cr, available) {
1153 MatchResult::Continue => {}
1154 MatchResult::Suspend => {
1155 suspend!();
1156 }
1157 MatchResult::Stop => {
1158 fail!(UNEXPECTED_CHARACTER, "Expected CRLF");
1159 }
1160 }
1161
1162 if cr == 0 {
1164 callback!(on_trailers, 2, 0);
1165 self.continue_without_data = true;
1166 advance!(2);
1167 self.complete(2);
1168 next!();
1169 }
1170
1171 let trailer_name_start = 0;
1172 let trailer_name_end = match find_char(data, trailer_name_start, cr, b':') {
1173 Some(index) if index > trailer_name_start => index,
1174 _ => {
1175 fail!(UNEXPECTED_CHARACTER, "Invalid trailer field name character");
1176 }
1177 };
1178
1179 let mut trailer_value_start = trailer_name_end + 1;
1180 let mut trailer_value_end = cr;
1181 if has_trailer_value_callback {
1182 strip_ows_fast(data, &mut trailer_value_start, &mut trailer_value_end, true);
1183 }
1184
1185 if !validate_token(data, trailer_name_start, trailer_name_end) {
1187 fail!(UNEXPECTED_CHARACTER, "Invalid trailer field name character");
1188 }
1189
1190 if has_trailer_name_callback {
1191 callback!(
1192 on_trailer_name,
1193 trailer_name_start,
1194 trailer_name_end - trailer_name_start
1195 );
1196 }
1197
1198 if has_trailer_value_callback {
1199 callback!(
1200 on_trailer_value,
1201 trailer_value_start,
1202 trailer_value_end - trailer_value_start
1203 );
1204 }
1205 advance!(cr + 2);
1206 }
1207 HeaderLineScanResult::Invalid(invalid) => {
1208 match find_char(data, 0, invalid, b':') {
1209 Some(_) => {
1210 fail!(UNEXPECTED_CHARACTER, "Invalid trailer field value character");
1211 }
1212 None => {
1213 fail!(UNEXPECTED_CHARACTER, "Invalid trailer field name character");
1214 }
1215 }
1216 }
1217 HeaderLineScanResult::Incomplete => {
1218 if available >= self.max_header_length {
1219 fail!(UNEXPECTED_CHARACTER, "Trailer line too long");
1220 } else {
1221 suspend!();
1222 }
1223 }
1224 }
1225 }
1226
1227 STATE_TUNNEL => {
1230 suspend!();
1231 }
1232
1233 _ => {
1234 fail!(UNEXPECTED_STATE, "Invalid state");
1235 }
1236 }
1237 }
1238
1239 if advanced > 0 {
1241 self.position += advanced;
1242 data = &data[advanced..];
1243 available -= advanced;
1244
1245 #[cfg(any(debug_assertions, feature = "debug"))]
1246 if self.debug {
1247 eprintln!(
1248 "[milo_parser::debug] loop before processing: position={}, advanced={}, available={}, \
1249 continue_without_data={}",
1250 self.position, advanced, available, self.continue_without_data
1251 );
1252 }
1253 }
1254
1255 #[cfg(any(debug_assertions, feature = "debug"))]
1257 if previous_state != self.state {
1258 callback!(on_state_change);
1259 previous_state = self.state;
1260 }
1261
1262 #[cfg(all(not(target_family = "wasm"), any(debug_assertions, feature = "debug")))]
1264 if self.debug {
1265 let duration = Instant::now().duration_since(last).as_nanos();
1266
1267 if duration > 0 {
1268 eprintln!(
1269 "[milo_parser::debug] loop iteration ({:?}) completed in {} ns",
1270 self.state_str(),
1271 duration
1272 );
1273 }
1274
1275 last = Instant::now();
1276 }
1277 }
1278
1279 #[cfg(any(debug_assertions, feature = "debug"))]
1280 if self.debug {
1281 eprintln!("[milo_parser::debug] loop exit");
1282 }
1283
1284 let consumed = self.position;
1285 self.parsed += consumed as u64;
1286
1287 if self.manage_unconsumed {
1288 unsafe {
1289 if unconsumed_len > 0 {
1291 let _ = from_raw_parts(self.unconsumed, unconsumed_len);
1292 }
1293
1294 if consumed < limit {
1297 let (ptr, len, _) = data.to_vec().into_raw_parts();
1298
1299 self.unconsumed = ptr;
1300 self.unconsumed_len = len;
1301 } else {
1302 self.unconsumed = ptr::null();
1303 self.unconsumed_len = 0;
1304 }
1305 }
1306 }
1307
1308 #[cfg(all(not(target_family = "wasm"), any(debug_assertions, feature = "debug")))]
1309 if self.debug {
1310 let duration = Instant::now().duration_since(start).as_nanos();
1311
1312 if duration > 0 {
1313 eprintln!(
1314 "[milo_parser::debug] parse ({:?}, consumed {} of {}) completed in {} ns",
1315 self.state_str(),
1316 consumed,
1317 limit,
1318 duration
1319 );
1320 }
1321 }
1322
1323 consumed
1325 }
1326
1327 #[inline(always)]
1329 fn complete(&mut self, offset: usize) {
1330 if self.active_callbacks != 0 {
1331 callback!(on_message_complete, offset, 0);
1332 callback!(on_reset, offset, 0);
1333 }
1334
1335 self.continue_without_data = false;
1336 self.skip_body = false;
1337
1338 if self.has_upgrade && self.is_request {
1339 move_to!(tunnel);
1340 } else if self.has_connection_close {
1341 if self.active_callbacks != 0 {
1342 callback!(on_finish);
1343 }
1344 move_to!(finish);
1345 } else {
1346 move_to!(start);
1347 }
1348 }
1349}