mik_sdk/request/
mod.rs

1//! HTTP Request wrapper for `mik_sdk` handlers.
2//!
3//! This module provides the `Request` struct that wraps raw `request-data` from WIT
4//! and provides convenient accessors for path parameters, query strings, headers, and body.
5
6mod parsing;
7
8use parsing::contains_ignore_ascii_case;
9pub use parsing::{DecodeError, url_decode};
10
11use crate::constants::{
12    HEADER_TRACE_ID, MAX_FORM_FIELDS, MAX_HEADER_VALUE_LEN, MAX_TOTAL_HEADERS_SIZE,
13    MAX_URL_DECODED_LEN,
14};
15use crate::json::{self, JsonValue};
16use std::cell::OnceCell;
17use std::collections::HashMap;
18
19/// HTTP method enum matching the WIT definition.
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
21#[non_exhaustive]
22pub enum Method {
23    /// HTTP GET method - retrieve a resource.
24    Get,
25    /// HTTP POST method - create a resource.
26    Post,
27    /// HTTP PUT method - replace a resource.
28    Put,
29    /// HTTP PATCH method - partially update a resource.
30    Patch,
31    /// HTTP DELETE method - remove a resource.
32    Delete,
33    /// HTTP HEAD method - retrieve headers only.
34    Head,
35    /// HTTP OPTIONS method - retrieve allowed methods.
36    Options,
37}
38
39impl Method {
40    /// Returns the method as an uppercase string (e.g., "GET", "POST").
41    #[must_use]
42    pub const fn as_str(&self) -> &'static str {
43        match self {
44            Self::Get => "GET",
45            Self::Post => "POST",
46            Self::Put => "PUT",
47            Self::Patch => "PATCH",
48            Self::Delete => "DELETE",
49            Self::Head => "HEAD",
50            Self::Options => "OPTIONS",
51        }
52    }
53}
54
55impl std::fmt::Display for Method {
56    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
57        f.write_str(self.as_str())
58    }
59}
60
61/// HTTP Request wrapper providing convenient access to request data.
62///
63/// Created by the `routes!` macro from raw `request-data`. Provides:
64/// - Path parameters extracted from route patterns (e.g., `/users/{id}`)
65/// - Query string parsing
66/// - Header access (case-insensitive)
67/// - Body access (raw bytes, text, or parsed via external JSON)
68///
69/// # Example
70///
71/// ```ignore
72/// fn get_user(req: &Request) -> Response {
73///     let id = req.param("id").unwrap_or("0");
74///     let page = req.query("page").unwrap_or("1");
75///     let auth = req.header("authorization");
76///     // ...
77/// }
78/// ```
79///
80/// # Implementation Notes
81///
82/// This struct stores headers with an index-based lookup optimization:
83/// - `headers`: Original header pairs for `headers()` iteration
84/// - `header_index`: Maps lowercase keys to indices in `headers` for O(1) lookups
85///
86/// This avoids cloning header values while providing:
87/// - O(1) header lookups via `header()` and `header_all()`
88/// - Original header iteration via `headers()`
89#[non_exhaustive]
90pub struct Request {
91    method: Method,
92    path: String,
93    /// Original headers for iteration. See `headers()`.
94    headers: Vec<(String, String)>,
95    body: Option<Vec<u8>>,
96    /// Path parameters extracted by routes! macro.
97    params: HashMap<String, String>,
98    /// Lazily parsed query parameters (stores all values for each key).
99    ///
100    /// Uses `OnceCell` for lazy initialization - parsing only happens on first
101    /// access via `query()` or `query_all()`. This avoids parsing overhead for
102    /// handlers that don't use query parameters.
103    query_cache: OnceCell<HashMap<String, Vec<String>>>,
104    /// Lazily parsed form body (application/x-www-form-urlencoded).
105    ///
106    /// Uses `OnceCell` for lazy initialization - parsing only happens on first
107    /// access via `form()` or `form_all()`. This avoids parsing overhead for
108    /// handlers that don't read form data.
109    form_cache: OnceCell<HashMap<String, Vec<String>>>,
110    /// Index map for O(1) header lookup (lowercase keys -> indices in headers vec).
111    /// Supports multiple values per header (e.g., Set-Cookie).
112    header_index: HashMap<String, Vec<usize>>,
113}
114
115impl std::fmt::Debug for Request {
116    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
117        // Design decision: Omit internal cache fields from Debug output.
118        //
119        // Excluded fields and rationale:
120        // - `query_cache`: Lazy cache, populated on first query() call. Showing it
121        //   would expose implementation details and vary based on access patterns.
122        // - `form_cache`: Same as query_cache - lazy initialization detail.
123        // - `header_index`: Internal O(1) lookup optimization. Users should see
124        //   headers via `headers` field, not the index structure.
125        //
126        // This keeps Debug output focused on the actual request data that handlers
127        // care about, not internal performance optimizations.
128        f.debug_struct("Request")
129            .field("method", &self.method)
130            .field("path", &self.path)
131            .field("headers", &self.headers.len())
132            .field("body", &self.body.as_ref().map(std::vec::Vec::len))
133            .field("params", &self.params)
134            .finish_non_exhaustive()
135    }
136}
137
138impl Request {
139    /// Create a new Request from raw components.
140    ///
141    /// This is called by the `routes!` macro after pattern matching.
142    /// Users don't typically call this directly.
143    #[doc(hidden)]
144    #[must_use]
145    pub fn new(
146        method: Method,
147        path: String,
148        headers: Vec<(String, String)>,
149        body: Option<Vec<u8>>,
150        params: HashMap<String, String>,
151    ) -> Self {
152        // Build index map: lowercase keys -> indices in headers vec
153        // This avoids cloning header values (only lowercase keys are allocated)
154        // Pre-allocate based on header count (most headers have unique names)
155        let mut header_index: HashMap<String, Vec<usize>> = HashMap::with_capacity(headers.len());
156
157        // Track header sizes for security limits
158        let mut total_headers_size: usize = 0;
159        let mut oversized_value_count = 0u32;
160        let mut total_size_exceeded = false;
161
162        for (i, (k, v)) in headers.iter().enumerate() {
163            // Track total headers size (name + value)
164            let header_size = k.len().saturating_add(v.len());
165            total_headers_size = total_headers_size.saturating_add(header_size);
166
167            // Check individual header value size
168            if v.len() > MAX_HEADER_VALUE_LEN {
169                oversized_value_count += 1;
170            }
171
172            // Check total headers size
173            if total_headers_size > MAX_TOTAL_HEADERS_SIZE && !total_size_exceeded {
174                total_size_exceeded = true;
175            }
176
177            header_index.entry(k.to_lowercase()).or_default().push(i);
178        }
179
180        // Log warnings for security limit violations (defense-in-depth)
181        if oversized_value_count > 0 {
182            crate::log_warn!(
183                "Header value size limit exceeded: {} header(s) exceed {} bytes (max: {} bytes)",
184                oversized_value_count,
185                MAX_HEADER_VALUE_LEN,
186                MAX_HEADER_VALUE_LEN
187            );
188        }
189        if total_size_exceeded {
190            crate::log_warn!(
191                "Total headers size limit exceeded: {} bytes (max: {} bytes)",
192                total_headers_size,
193                MAX_TOTAL_HEADERS_SIZE
194            );
195        }
196
197        Self {
198            method,
199            path,
200            headers,
201            body,
202            params,
203            query_cache: OnceCell::new(),
204            form_cache: OnceCell::new(),
205            header_index,
206        }
207    }
208
209    /// HTTP method (GET, POST, etc.).
210    #[inline]
211    pub const fn method(&self) -> Method {
212        self.method
213    }
214
215    /// Full request path including query string (e.g., "/users/123?page=1").
216    #[inline]
217    pub fn path(&self) -> &str {
218        &self.path
219    }
220
221    /// Just the path portion without query string (e.g., "/users/123").
222    #[inline]
223    pub fn path_without_query(&self) -> &str {
224        self.path.split('?').next().unwrap_or(&self.path)
225    }
226
227    /// Get a path parameter extracted from the route pattern.
228    ///
229    /// For route `/users/{id}` matching path `/users/123`, `param("id")` returns `Some("123")`.
230    #[inline]
231    pub fn param(&self, name: &str) -> Option<&str> {
232        self.params.get(name).map(String::as_str)
233    }
234
235    /// Get the first query parameter value from the URL.
236    ///
237    /// For path `/users?page=2&limit=10`, `query("page")` returns `Some("2")`.
238    /// For multiple values with same key, use `query_all()`.
239    pub fn query(&self, name: &str) -> Option<&str> {
240        // Parse query string lazily on first access
241        let cache = self.query_cache.get_or_init(|| self.parse_query());
242        cache.get(name).and_then(|v| v.first()).map(String::as_str)
243    }
244
245    /// Get all query parameter values for a key.
246    ///
247    /// HTTP allows multiple query params with the same name (e.g., `?ids=1&ids=2&ids=3`).
248    /// This returns all values for such parameters.
249    ///
250    /// ```ignore
251    /// // For URL: /search?tag=rust&tag=wasm&tag=http
252    /// let tags = req.query_all("tag");
253    /// assert_eq!(tags, &["rust", "wasm", "http"]);
254    /// ```
255    pub fn query_all(&self, name: &str) -> &[String] {
256        let cache = self.query_cache.get_or_init(|| self.parse_query());
257        cache.get(name).map_or(&[], Vec::as_slice)
258    }
259
260    /// Get the first header value by name (case-insensitive).
261    ///
262    /// Uses pre-normalized `HashMap` for O(1) lookup. Avoids allocation when
263    /// the header name is already lowercase (common case).
264    /// For headers with multiple values (e.g., Set-Cookie), use `header_all()`.
265    ///
266    /// ```ignore
267    /// let content_type = req.header("content-type");
268    /// let auth = req.header("Authorization"); // Same as "authorization"
269    /// ```
270    pub fn header(&self, name: &str) -> Option<&str> {
271        // Fast path: if name is already lowercase, avoid allocation
272        let indices = if name.bytes().all(|b| !b.is_ascii_uppercase()) {
273            self.header_index.get(name)
274        } else {
275            // Slow path: allocate lowercase key for mixed-case lookups
276            self.header_index.get(&name.to_lowercase())
277        };
278
279        indices
280            .and_then(|idx| idx.first())
281            .and_then(|&i| self.headers.get(i))
282            .map(|(_, v)| v.as_str())
283    }
284
285    /// Get the trace ID from the incoming request.
286    ///
287    /// Returns the value of the `x-trace-id` header if present.
288    /// Use this with `ClientRequest::with_trace_id()` to propagate
289    /// trace context to outgoing HTTP calls.
290    ///
291    /// ```ignore
292    /// let response = fetch!(GET "https://api.example.com/data")
293    ///     .with_trace_id(req.trace_id())
294    ///     .send_with(&handler)?;
295    /// ```
296    #[inline]
297    pub fn trace_id(&self) -> Option<&str> {
298        self.header(HEADER_TRACE_ID)
299    }
300
301    /// Get all values for a header (case-insensitive).
302    ///
303    /// HTTP allows multiple headers with the same name (e.g., Set-Cookie, Accept).
304    /// This returns all values for such headers.
305    ///
306    /// ```ignore
307    /// let cookies = req.header_all("set-cookie");
308    /// for cookie in &cookies {
309    ///     println!("Cookie: {}", cookie);
310    /// }
311    /// ```
312    pub fn header_all(&self, name: &str) -> Vec<&str> {
313        // Fast path: if name is already lowercase, avoid allocation
314        let indices = if name.bytes().all(|b| !b.is_ascii_uppercase()) {
315            self.header_index.get(name)
316        } else {
317            // Slow path: allocate lowercase key for mixed-case lookups
318            self.header_index.get(&name.to_lowercase())
319        };
320
321        indices
322            .map(|idx| {
323                idx.iter()
324                    .filter_map(|&i| self.headers.get(i).map(|(_, v)| v.as_str()))
325                    .collect()
326            })
327            .unwrap_or_default()
328    }
329
330    /// Get all headers as name-value pairs.
331    ///
332    /// Returns headers in their original form (before normalization).
333    #[inline]
334    pub fn headers(&self) -> &[(String, String)] {
335        &self.headers
336    }
337
338    /// Raw request body bytes.
339    ///
340    /// Returns the raw bytes of the request body, or `None` if no body was provided.
341    ///
342    /// # Returns
343    ///
344    /// - `Some(&[u8])` - The raw body bytes
345    /// - `None` - No body in request
346    #[inline]
347    #[must_use]
348    pub fn body(&self) -> Option<&[u8]> {
349        self.body.as_deref()
350    }
351
352    /// Request body as UTF-8 text.
353    ///
354    /// # Returns
355    ///
356    /// - `Some(&str)` - Body successfully decoded as UTF-8
357    /// - `None` - No body, or body is not valid UTF-8
358    #[inline]
359    #[must_use]
360    pub fn text(&self) -> Option<&str> {
361        self.body.as_ref().and_then(|b| std::str::from_utf8(b).ok())
362    }
363
364    /// Check if request has a body.
365    #[inline]
366    #[must_use]
367    pub fn has_body(&self) -> bool {
368        self.body.as_ref().is_some_and(|b| !b.is_empty())
369    }
370
371    /// Content-Type header value.
372    ///
373    /// # Returns
374    ///
375    /// - `Some(&str)` - The Content-Type header value
376    /// - `None` - No Content-Type header present
377    #[inline]
378    #[must_use]
379    pub fn content_type(&self) -> Option<&str> {
380        use crate::constants::HEADER_CONTENT_TYPE;
381        self.header(HEADER_CONTENT_TYPE)
382    }
383
384    /// Check if Content-Type is JSON (case-insensitive).
385    #[inline]
386    #[must_use]
387    pub fn is_json(&self) -> bool {
388        use crate::constants::MIME_JSON;
389        self.content_type()
390            .is_some_and(|ct| contains_ignore_ascii_case(ct, MIME_JSON))
391    }
392
393    /// Check if Content-Type is form-urlencoded (case-insensitive).
394    #[inline]
395    #[must_use]
396    pub fn is_form(&self) -> bool {
397        use crate::constants::MIME_FORM_URLENCODED;
398        self.content_type()
399            .is_some_and(|ct| contains_ignore_ascii_case(ct, MIME_FORM_URLENCODED))
400    }
401
402    /// Check if Content-Type is HTML (case-insensitive).
403    #[inline]
404    #[must_use]
405    pub fn is_html(&self) -> bool {
406        use crate::constants::MIME_HTML;
407        self.content_type()
408            .is_some_and(|ct| contains_ignore_ascii_case(ct, MIME_HTML))
409    }
410
411    /// Check if client accepts a content type (via Accept header).
412    ///
413    /// Performs a simple case-insensitive substring match against the Accept header.
414    /// Does not parse q-values; returns `true` if the MIME type is present at all.
415    ///
416    /// ```ignore
417    /// // Accept: text/html, application/json
418    /// req.accepts("json")  // true
419    /// req.accepts("html")  // true
420    /// req.accepts("xml")   // false
421    /// ```
422    pub fn accepts(&self, mime: &str) -> bool {
423        self.header("accept")
424            .is_some_and(|accept| contains_ignore_ascii_case(accept, mime))
425    }
426
427    /// Get the first form field value from a form-urlencoded body.
428    ///
429    /// Parses `application/x-www-form-urlencoded` body data.
430    /// For multiple values with same key, use `form_all()`.
431    ///
432    /// # Returns
433    ///
434    /// - `Some(&str)` - The decoded field value
435    /// - `None` - Field not present, no body, or body is not valid UTF-8
436    ///
437    /// # Examples
438    ///
439    /// ```ignore
440    /// // Body: name=Alice&email=alice%40example.com
441    /// let name = req.form("name");  // Some("Alice")
442    /// let email = req.form("email"); // Some("alice@example.com")
443    /// ```
444    #[must_use]
445    pub fn form(&self, name: &str) -> Option<&str> {
446        self.form_cache()
447            .get(name)
448            .and_then(|v| v.first())
449            .map(String::as_str)
450    }
451
452    /// Get all form field values for a key from a form-urlencoded body.
453    ///
454    /// ```ignore
455    /// // Body: tags=rust&tags=wasm&tags=http
456    /// let tags = req.form_all("tags"); // &["rust", "wasm", "http"]
457    /// ```
458    pub fn form_all(&self, name: &str) -> &[String] {
459        self.form_cache().get(name).map_or(&[], Vec::as_slice)
460    }
461
462    /// Parse request body as JSON using the provided parser.
463    ///
464    /// # Returns
465    ///
466    /// - `Some(T)` - Body successfully parsed by the provided function
467    /// - `None` - No body, or parser returned `None`
468    ///
469    /// # Examples
470    ///
471    /// ```ignore
472    /// let body = req.json_with(json::try_parse)?;
473    /// let name = body.get("name").str_or("");
474    /// ```
475    #[must_use]
476    pub fn json_with<T>(&self, parse: impl FnOnce(&[u8]) -> Option<T>) -> Option<T> {
477        self.body().and_then(parse)
478    }
479
480    /// Parse request body as JSON.
481    ///
482    /// Uses the built-in JSON parser. For custom parsers, use [`json_with`](Self::json_with).
483    ///
484    /// # Returns
485    ///
486    /// - `Some(JsonValue)` - Body successfully parsed as JSON
487    /// - `None` - No body, or body is not valid JSON
488    ///
489    /// # Examples
490    ///
491    /// ```ignore
492    /// let body = req.json()?;
493    /// let name = body.path_str(&["user", "name"]).unwrap_or("anonymous");
494    /// ```
495    #[must_use]
496    pub fn json(&self) -> Option<JsonValue> {
497        self.json_with(json::try_parse)
498    }
499
500    // --- Private helpers ---
501
502    fn form_cache(&self) -> &HashMap<String, Vec<String>> {
503        self.form_cache.get_or_init(|| self.parse_form())
504    }
505
506    fn parse_form(&self) -> HashMap<String, Vec<String>> {
507        let mut map: HashMap<String, Vec<String>> = HashMap::new();
508
509        if let Some(body) = self.text() {
510            let mut truncated = false;
511            let mut decode_failures = 0u32;
512            for pair in body.split('&') {
513                // Defense-in-depth: limit number of form fields
514                if map.len() >= MAX_FORM_FIELDS {
515                    truncated = true;
516                    break;
517                }
518
519                if let Some((key, value)) = pair.split_once('=') {
520                    match (url_decode(key), url_decode(value)) {
521                        (Ok(decoded_key), Ok(decoded_value)) => {
522                            map.entry(decoded_key).or_default().push(decoded_value);
523                        },
524                        _ => {
525                            decode_failures += 1;
526                        },
527                    }
528                } else if !pair.is_empty() {
529                    match url_decode(pair) {
530                        Ok(decoded_key) => {
531                            map.entry(decoded_key).or_default().push(String::new());
532                        },
533                        Err(_) => {
534                            decode_failures += 1;
535                        },
536                    }
537                }
538            }
539
540            if truncated {
541                crate::log_warn!(
542                    "Form field limit exceeded: dropped fields after {} (max: {})",
543                    MAX_FORM_FIELDS,
544                    MAX_FORM_FIELDS
545                );
546            }
547            if decode_failures > 0 {
548                crate::log_warn!(
549                    "Form field decode failed: dropped {} field(s). Check for invalid percent-encoding (e.g., %ZZ) or values exceeding {} bytes after decoding",
550                    decode_failures,
551                    MAX_URL_DECODED_LEN
552                );
553            }
554        }
555
556        map
557    }
558
559    fn parse_query(&self) -> HashMap<String, Vec<String>> {
560        let mut map: HashMap<String, Vec<String>> = HashMap::new();
561        let mut dropped_count = 0u32;
562
563        if let Some(query_start) = self.path.find('?') {
564            let query = &self.path[query_start + 1..];
565            for pair in query.split('&') {
566                if let Some((key, value)) = pair.split_once('=') {
567                    // URL decode and store (supports multiple values per key)
568                    match (url_decode(key), url_decode(value)) {
569                        (Ok(decoded_key), Ok(decoded_value)) => {
570                            map.entry(decoded_key).or_default().push(decoded_value);
571                        },
572                        _ => {
573                            dropped_count += 1;
574                        },
575                    }
576                } else if !pair.is_empty() {
577                    // Key without value (e.g., "?flag")
578                    match url_decode(pair) {
579                        Ok(decoded_key) => {
580                            map.entry(decoded_key).or_default().push(String::new());
581                        },
582                        Err(_) => {
583                            dropped_count += 1;
584                        },
585                    }
586                }
587            }
588        }
589
590        if dropped_count > 0 {
591            crate::log_warn!(
592                "Query param decode failed: dropped {} param(s). Check for invalid percent-encoding (e.g., %ZZ) or values exceeding {} bytes after decoding",
593                dropped_count,
594                MAX_URL_DECODED_LEN
595            );
596        }
597
598        map
599    }
600}
601
602#[cfg(test)]
603mod tests;