Skip to main content

http_collator/
h1.rs

1//! HTTP/1.x parsing utilities
2
3use h2session::TimestampNs;
4// Re-export HTTP types from h2session for use across all HTTP versions
5pub use h2session::{HttpRequest, HttpResponse};
6use http::{HeaderMap, HeaderName, HeaderValue, Method, StatusCode, Uri};
7
8/// Check if data starts with an HTTP/1.x request
9pub fn is_http1_request(data: &[u8]) -> bool {
10    data.starts_with(b"GET ")
11        || data.starts_with(b"POST ")
12        || data.starts_with(b"PUT ")
13        || data.starts_with(b"DELETE ")
14        || data.starts_with(b"HEAD ")
15        || data.starts_with(b"OPTIONS ")
16        || data.starts_with(b"PATCH ")
17        || data.starts_with(b"CONNECT ")
18}
19
20/// Check if data starts with an HTTP/1.x response
21pub fn is_http1_response(data: &[u8]) -> bool {
22    data.starts_with(b"HTTP/1.0") || data.starts_with(b"HTTP/1.1")
23}
24
25/// Try to parse an HTTP/1.x request, returning Some only if complete.
26/// This combines header parsing and body completeness checking in one pass.
27pub fn try_parse_http1_request(data: &[u8], timestamp_ns: TimestampNs) -> Option<HttpRequest> {
28    try_parse_http1_request_sized(data, timestamp_ns).map(|(req, _)| req)
29}
30
31/// Try to parse an HTTP/1.x request, returning both the parsed message and
32/// the number of bytes consumed from `data` (headers + body). Enables the
33/// caller to drain those bytes and parse any pipelined follow-on request.
34pub fn try_parse_http1_request_sized(
35    data: &[u8],
36    timestamp_ns: TimestampNs,
37) -> Option<(HttpRequest, usize)> {
38    let mut headers = [httparse::EMPTY_HEADER; 64];
39    let mut req = httparse::Request::new(&mut headers);
40
41    let body_offset = match req.parse(data) {
42        Ok(httparse::Status::Complete(len)) => len,
43        _ => return None, // Headers incomplete
44    };
45
46    let body_data = &data[body_offset..];
47    let (body, body_len) = match determine_body(req.headers, body_data, None) {
48        BodyResult::Complete { body, consumed } => (body, consumed),
49        BodyResult::Incomplete => return None,
50    };
51
52    let method = Method::from_bytes(req.method?.as_bytes()).ok()?;
53    let uri: Uri = req.path?.parse().ok()?;
54
55    let mut header_map = HeaderMap::new();
56    for h in req.headers.iter() {
57        let parsed = (
58            HeaderName::from_bytes(h.name.as_bytes()),
59            HeaderValue::from_bytes(h.value),
60        );
61        if let (Ok(name), Ok(value)) = parsed {
62            header_map.append(name, value);
63        }
64    }
65
66    let consumed = body_offset + body_len;
67    Some((
68        HttpRequest {
69            method,
70            uri,
71            headers: header_map,
72            body,
73            timestamp_ns,
74            version: Some(req.version?),
75        },
76        consumed,
77    ))
78}
79
80/// Try to parse an HTTP/1.x response, returning Some only if complete.
81/// This combines header parsing and body completeness checking in one pass.
82pub fn try_parse_http1_response(data: &[u8], timestamp_ns: TimestampNs) -> Option<HttpResponse> {
83    try_parse_http1_response_sized(data, timestamp_ns).map(|(resp, _)| resp)
84}
85
86/// Try to parse an HTTP/1.x response, returning both the parsed message and
87/// the number of bytes consumed from `data`. Enables the caller to drain
88/// those bytes and parse any pipelined follow-on response.
89pub fn try_parse_http1_response_sized(
90    data: &[u8],
91    timestamp_ns: TimestampNs,
92) -> Option<(HttpResponse, usize)> {
93    let mut headers = [httparse::EMPTY_HEADER; 64];
94    let mut res = httparse::Response::new(&mut headers);
95
96    let body_offset = match res.parse(data) {
97        Ok(httparse::Status::Complete(len)) => len,
98        _ => return None, // Headers incomplete
99    };
100
101    let body_data = &data[body_offset..];
102    let (body, body_len) = match determine_body(res.headers, body_data, res.code) {
103        BodyResult::Complete { body, consumed } => (body, consumed),
104        BodyResult::Incomplete => return None,
105    };
106
107    let status = StatusCode::from_u16(res.code?).ok()?;
108
109    let mut header_map = HeaderMap::new();
110    for h in res.headers.iter() {
111        let parsed = (
112            HeaderName::from_bytes(h.name.as_bytes()),
113            HeaderValue::from_bytes(h.value),
114        );
115        if let (Ok(name), Ok(value)) = parsed {
116            header_map.append(name, value);
117        }
118    }
119
120    let consumed = body_offset + body_len;
121    Some((
122        HttpResponse {
123            status,
124            headers: header_map,
125            body,
126            timestamp_ns,
127            version: Some(res.version?),
128            reason: res.reason.map(String::from),
129        },
130        consumed,
131    ))
132}
133
134/// Finalize an HTTP/1.x response when the connection closes.
135///
136/// For responses without explicit framing (no Content-Length or
137/// Transfer-Encoding), RFC 7230 §3.3.3 says the body is everything until the
138/// connection closes. This function parses the headers and takes all remaining
139/// data as the body.
140pub fn try_finalize_http1_response(data: &[u8], timestamp_ns: TimestampNs) -> Option<HttpResponse> {
141    let mut headers = [httparse::EMPTY_HEADER; 64];
142    let mut res = httparse::Response::new(&mut headers);
143
144    let body_offset = match res.parse(data) {
145        Ok(httparse::Status::Complete(len)) => len,
146        _ => return None,
147    };
148
149    let body = data[body_offset..].to_vec();
150    let status = StatusCode::from_u16(res.code?).ok()?;
151
152    let mut header_map = HeaderMap::new();
153    for h in res.headers.iter() {
154        let parsed = (
155            HeaderName::from_bytes(h.name.as_bytes()),
156            HeaderValue::from_bytes(h.value),
157        );
158        if let (Ok(name), Ok(value)) = parsed {
159            header_map.append(name, value);
160        }
161    }
162
163    Some(HttpResponse {
164        status,
165        headers: header_map,
166        body,
167        timestamp_ns,
168        version: Some(res.version?),
169        reason: res.reason.map(String::from),
170    })
171}
172
173/// Result of body determination for an HTTP/1.x message.
174enum BodyResult {
175    /// Body is complete. `body` is the decoded payload; `consumed` is the
176    /// number of raw bytes from the body_data slice that belong to this
177    /// message (for chunked encoding this differs from body.len()).
178    Complete { body: Vec<u8>, consumed: usize },
179    /// Not enough data yet
180    Incomplete,
181}
182
183/// Determine the body of an HTTP/1.x message based on headers and available
184/// data.
185///
186/// - Content-Length: body is exactly `body_data[..content_length]`
187/// - Transfer-Encoding: chunked: walks chunk boundaries to decode body
188/// - Neither (request): body is empty (no body expected, e.g., GET requests)
189/// - Neither (response with body-bearing status): incomplete (RFC 7230 §3.3.3
190///   read-until-close semantics)
191///
192/// `response_status`: `None` for requests, `Some(code)` for responses.
193fn determine_body(
194    headers: &[httparse::Header<'_>],
195    body_data: &[u8],
196    response_status: Option<u16>,
197) -> BodyResult {
198    // Look for Content-Length (case-insensitive via httparse)
199    for h in headers.iter() {
200        if h.name.eq_ignore_ascii_case("Content-Length") {
201            if let Ok(len_str) = std::str::from_utf8(h.value)
202                && let Ok(content_length) = len_str.trim().parse::<usize>()
203            {
204                if body_data.len() >= content_length {
205                    return BodyResult::Complete {
206                        body:     body_data[..content_length].to_vec(),
207                        consumed: content_length,
208                    };
209                }
210                return BodyResult::Incomplete;
211            }
212            return BodyResult::Incomplete; // Invalid Content-Length
213        }
214    }
215
216    // Check for Transfer-Encoding: chunked
217    for h in headers.iter() {
218        if h.name.eq_ignore_ascii_case("Transfer-Encoding")
219            && let Ok(value) = std::str::from_utf8(h.value)
220            && value.to_ascii_lowercase().contains("chunked")
221        {
222            return decode_chunked_body(body_data);
223        }
224    }
225
226    // No Content-Length and not chunked
227    match response_status {
228        // Requests have no body by default
229        None => BodyResult::Complete {
230            body:     Vec::new(),
231            consumed: 0,
232        },
233        // 1xx, 204, and 304 responses explicitly have no body (RFC 7230 §3.3.3)
234        Some(code) if (100..200).contains(&code) || code == 204 || code == 304 => {
235            BodyResult::Complete {
236                body:     Vec::new(),
237                consumed: 0,
238            }
239        },
240        // Other responses: body is read until connection close
241        Some(_) => BodyResult::Incomplete,
242    }
243}
244
245/// Walk chunk boundaries to decode a chunked transfer-encoded body.
246///
247/// Chunk format: `[hex-size][;ext=val]\r\n[data]\r\n` terminated by
248/// `0\r\n\r\n`. Returns the decoded body or Incomplete if not enough data.
249fn decode_chunked_body(data: &[u8]) -> BodyResult {
250    let mut decoded = Vec::new();
251    let mut pos = 0;
252
253    loop {
254        // Find the end of the chunk-size line
255        let line_end = match find_crlf(data, pos) {
256            Some(idx) => idx,
257            None => return BodyResult::Incomplete,
258        };
259
260        // Parse hex chunk size (ignore chunk extensions after ';')
261        let size_bytes = &data[pos..line_end];
262        let size_part = match size_bytes.iter().position(|&b| b == b';') {
263            Some(semi_pos) => &size_bytes[..semi_pos],
264            None => size_bytes,
265        };
266        let Ok(size_str) = std::str::from_utf8(size_part) else {
267            return BodyResult::Incomplete; // Non-UTF8 chunk size
268        };
269        let Ok(chunk_size) = usize::from_str_radix(size_str.trim(), 16) else {
270            return BodyResult::Incomplete; // Non-hex chunk size
271        };
272
273        // Advance past the chunk-size line (including \r\n)
274        pos = line_end + 2;
275
276        if chunk_size == 0 {
277            // Terminal chunk: expect trailing \r\n (may also have trailers, but
278            // for simplicity we just need \r\n after the 0-size chunk line)
279            if pos + 2 > data.len() {
280                return BodyResult::Incomplete;
281            }
282            // Verify trailing \r\n
283            if data[pos..pos + 2] != *b"\r\n" {
284                // Could be trailers; scan for the final \r\n\r\n
285                match find_crlf_crlf(data, pos) {
286                    Some(trailer_start) => {
287                        return BodyResult::Complete {
288                            body:     decoded,
289                            consumed: trailer_start + 4,
290                        };
291                    },
292                    None => return BodyResult::Incomplete,
293                }
294            }
295            return BodyResult::Complete {
296                body:     decoded,
297                consumed: pos + 2,
298            };
299        }
300
301        // Read chunk data
302        if pos + chunk_size > data.len() {
303            return BodyResult::Incomplete;
304        }
305        decoded.extend_from_slice(&data[pos..pos + chunk_size]);
306        pos += chunk_size;
307
308        // Expect \r\n after chunk data
309        if pos + 2 > data.len() {
310            return BodyResult::Incomplete;
311        }
312        if data[pos..pos + 2] != *b"\r\n" {
313            return BodyResult::Incomplete; // Malformed
314        }
315        pos += 2;
316    }
317}
318
319/// Find the position of `\r\n` starting at `from` in `data`.
320fn find_crlf(data: &[u8], from: usize) -> Option<usize> {
321    if from >= data.len() {
322        return None;
323    }
324    data[from..]
325        .windows(2)
326        .position(|w| w == b"\r\n")
327        .map(|p| from + p)
328}
329
330/// Find the position of `\r\n\r\n` starting at `from` in `data`.
331fn find_crlf_crlf(data: &[u8], from: usize) -> Option<usize> {
332    if from >= data.len() {
333        return None;
334    }
335    data[from..]
336        .windows(4)
337        .position(|w| w == b"\r\n\r\n")
338        .map(|p| from + p)
339}
340
341#[cfg(test)]
342mod tests;