1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
use std::mem::MaybeUninit;
use crate::ascii::HttpChar;
use crate::bytes::ByteSearch;
use crate::error::ParseErrorKind;
use crate::headers::{Header, HttpVersion, Method, RequestHeader};
use crate::scan::scan_header_line;
use crate::validate::HttpValidate;
use super::Request;
use super::HeaderSlot;
use crate::ascii::parse_content_length;
impl<'buf, const MAX_HDRS: usize> Request<'buf, MAX_HDRS> {
/// Parse raw HTTP header bytes into a `Request`.
///
/// The input **must** contain the `\r\n\r\n` header terminator.
/// No UTF-8 validation is performed — all fields are returned as byte
/// slices. Use [`Request::path_str`] / [`Request::header_str`] to
/// convert to `&str` on demand.
///
/// # Errors
///
/// Returns [`crate::error::Error`] if the request is malformed, the method is
/// unsupported, or the `\r\n\r\n` terminator is missing.
#[inline]
pub fn parse(header_bytes: &'buf [u8]) -> Result<Self, crate::error::Error> {
Self::parse_impl(header_bytes).map_err(crate::error::Error::from)
}
/// Internal parse returning the crate-internal `ParseErrorKind`.
pub(super) fn parse_impl(header_bytes: &'buf [u8]) -> Result<Self, ParseErrorKind> {
// Compile-time guarantee that header indices fit in u8.
const { assert!(MAX_HDRS <= u8::MAX as usize, "MAX_HDRS must be <= 255") }
let (method, path, version, request_line_len) = Self::parse_request_line(header_bytes)?;
// Build the Request in place. `parse_headers` writes through
// `&mut` references to the headers/known slots, avoiding a 1
// KiB stack-to-stack copy that previously showed up at ~8% of
// htmx parse cycles in perf annotation. RVO should place this
// directly into the caller's result slot.
let mut req = Request {
method,
version,
path,
headers: [const { MaybeUninit::uninit() }; MAX_HDRS],
header_count: 0,
known: [HeaderSlot::EMPTY; RequestHeader::COUNT],
content_length: None,
};
Self::parse_headers(
header_bytes,
request_line_len,
&mut req.headers,
&mut req.known,
&mut req.header_count,
)?;
// `headers_init` is the single point at which we reinterpret
// initialized `MaybeUninit<Header>` slots as `Header`. Reuse it
// so the safety argument lives in exactly one place.
let init_headers = req.headers_init();
req.content_length = Self::validate_semantics(method, version, init_headers, &req.known)?;
Ok(req)
}
/// Parse the first line: `METHOD SP path SP HTTP/x.y CRLF`.
/// Returns (method, path, version, bytes consumed including CRLF).
fn parse_request_line(
header_bytes: &'buf [u8],
) -> Result<(Method, &'buf [u8], HttpVersion, usize), ParseErrorKind> {
let line_end = header_bytes
.find_crlf(0)
.ok_or(ParseErrorKind::NoRequestLine)?;
let request_line = &header_bytes[..line_end];
let first_space = request_line
.find_byte(HttpChar::Space.as_u8())
.ok_or(ParseErrorKind::MalformedRequestLine)?;
let method_bytes = &request_line[..first_space];
let rest = &request_line[first_space + 1..];
let second_space = rest
.find_byte(HttpChar::Space.as_u8())
.ok_or(ParseErrorKind::MalformedRequestLine)?;
let method = Method::from_bytes(method_bytes).ok_or(ParseErrorKind::UnsupportedMethod)?;
let path = &request_line[first_space + 1..first_space + 1 + second_space];
// `path` is the slice between the first and second space of the
// request line (see the slicing on the line above). Reject empty
// paths and paths whose first or last byte is itself a space:
// - `path[0] == Space` means the request line contained two
// consecutive spaces after the method (e.g. "GET /foo HTTP/1.1");
// the first space delimits the method and the second one ends up
// as `path[0]`.
// - `path[path.len() - 1] == Space` catches a trailing space before
// the version (e.g. "GET /foo HTTP/1.1"), where the extra space
// is the last byte of the slice before the second delimiter.
// Either case causes path interpretation to differ between parsers —
// a classic request-smuggling / routing-inconsistency vector.
if path.is_empty() || path[0] == HttpChar::Space || path[path.len() - 1] == HttpChar::Space
{
return Err(ParseErrorKind::MalformedRequestTarget);
}
// Reject control characters in the request target (RFC 7230 §3.1.1).
if !path.is_valid_request_target() {
return Err(ParseErrorKind::MalformedRequestTarget);
}
let version_bytes = &rest[second_space + 1..];
let version =
HttpVersion::from_bytes(version_bytes).ok_or(ParseErrorKind::UnsupportedHttpVersion)?;
Ok((method, path, version, line_end + 2))
}
/// Parse all header lines after the request line, writing into
/// caller-provided slots.
///
/// Takes `&mut` references rather than returning the arrays by
/// value to avoid a 1 KiB stack-to-stack memcpy — at the default
/// `MAX_HDRS = 32` and 32-byte `Header`, returning by value
/// produced a byte-shuffling pattern that was ~8% of cycles on
/// the htmx benchmark.
fn parse_headers(
header_bytes: &'buf [u8],
start: usize,
headers: &mut [MaybeUninit<Header<'buf>>; MAX_HDRS],
known: &mut [HeaderSlot; RequestHeader::COUNT],
out_count: &mut usize,
) -> Result<(), ParseErrorKind> {
let mut header_count: usize = 0;
let mut pos = start;
let mut found_end = false;
while pos < header_bytes.len() {
let remaining = &header_bytes[pos..];
// Check for empty line (header terminator \r\n).
if remaining.len() >= 2
&& remaining[0] == HttpChar::CarriageReturn
&& remaining[1] == HttpChar::LineFeed
{
found_end = true;
break;
}
// Fused single-pass: find colon + \r\n + validate TCHAR/value in one scan.
//
// Obs-fold rejection: RFC 7230 §3.2.4 deprecated continuation
// lines (CRLF followed by SP/HTAB). `scan_header_line`
// terminates on the first `\r\n`, so a folded header surfaces
// here as a fresh "header line" starting with SP/HTAB. SP and
// HTAB are not valid TCHARs, so the scanner errors with
// `MalformedHeader` before any colon is found. Tolerating
// obs-fold is a request-smuggling vector when intermediaries
// disagree on the canonicalization.
let span = scan_header_line(remaining)?;
let name = &remaining[..span.colon];
let value = remaining[span.colon + 1..span.line_end].trim_ows();
if header_count >= MAX_HDRS {
return Err(ParseErrorKind::TooManyHeaders);
}
headers[header_count].write(Header::new(name, value));
// Populate O(1) lookup table for known headers.
// Reject duplicate Host, Content-Length, and Transfer-Encoding
// headers (RFC 7230 §5.4, §3.3.3). Duplicate TE is a request
// smuggling vector (RFC 7230 §3.3.3 item 3).
//
// RFC 7230 §3.3.2 technically permits multiple Content-Length
// headers if every value is identical (treated as a single
// value). We reject all duplicates regardless: tolerating any
// form of CL repetition has historically been a smuggling
// vector when intermediaries disagree on the canonicalization.
#[allow(clippy::cast_possible_truncation)]
if let Some(rh) = RequestHeader::from_bytes_ignore_case(name) {
let slot = rh as usize;
if known[slot].is_none() {
known[slot] = HeaderSlot::new(header_count as u8);
} else if matches!(
rh,
RequestHeader::Host
| RequestHeader::ContentLength
| RequestHeader::TransferEncoding
) {
return Err(ParseErrorKind::DuplicateHeader);
}
}
header_count += 1;
pos += span.line_end + 2;
}
if !found_end {
return Err(ParseErrorKind::IncompleteHeaders);
}
*out_count = header_count;
Ok(())
}
/// Validate header semantics: TE/CL conflicts, Content-Length parsing,
/// Host requirement. Returns cached `content_length`.
fn validate_semantics(
method: Method,
version: HttpVersion,
headers: &[Header<'buf>],
known: &[HeaderSlot; RequestHeader::COUNT],
) -> Result<Option<u64>, ParseErrorKind> {
let te = known[RequestHeader::TransferEncoding as usize];
let cl = known[RequestHeader::ContentLength as usize];
// Parse and cache Content-Length at parse time. An invalid value
// that a proxy might interpret differently is a request smuggling
// vector — reject it early rather than silently treating it as
// "no body".
let content_length = if let Some(idx) = cl.get() {
let cl_value = headers[idx as usize].value();
Some(parse_content_length(cl_value).ok_or(ParseErrorKind::InvalidContentLength)?)
} else {
None
};
// Validate Transfer-Encoding is "chunked" (case-insensitive).
// RFC 7230 §3.3.3: if a TE we don't understand is received, we
// MUST respond with 501 and close. Silently treating an unknown
// TE as chunked would cause a desync with proxies.
if let Some(idx) = te.get()
&& !headers[idx as usize]
.value()
.eq_ignore_ascii_case(b"chunked")
{
return Err(ParseErrorKind::UnsupportedTransferEncoding);
}
// RFC 7230 §3.3.1: Transfer-Encoding is HTTP/1.1+. HTTP/1.0 clients
// MUST NOT send it. Accepting TE on 1.0 is a smuggling vector when
// intermediaries downgrade-rewrite TE -> CL for 1.0 origins.
if te.is_some() && matches!(version, HttpVersion::Http10) {
return Err(ParseErrorKind::UnsupportedTransferEncoding);
}
// RFC 7230 §3.3.3: reject requests with both Transfer-Encoding
// and Content-Length, as this is a classic request smuggling vector.
if te.is_some() && cl.is_some() {
return Err(ParseErrorKind::ConflictingHeaders);
}
// Methods without body semantics should not carry Transfer-Encoding.
// A TE header on these methods is a smuggling vector: a proxy may
// forward the TE header while the origin ignores the body, causing
// desync.
if te.is_some() && !method.can_have_body() {
return Err(ParseErrorKind::ConflictingHeaders);
}
// RFC 7230 §5.4: HTTP/1.1 requests MUST include exactly one Host header.
if matches!(version, HttpVersion::Http11) && known[RequestHeader::Host as usize].is_none() {
return Err(ParseErrorKind::MissingHostHeader);
}
Ok(content_length)
}
}