mik_sdk/request/
parsing.rs

1//! URL decoding and parsing utilities.
2//!
3//! This module provides functions for URL decoding and case-insensitive string matching
4//! used by the Request module for query string, form body, and header parsing.
5
6use crate::constants::MAX_URL_DECODED_LEN;
7
8/// Error returned when URL decoding fails.
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10#[non_exhaustive]
11pub enum DecodeError {
12    /// Decoded output would exceed maximum length.
13    TooLong,
14}
15
16impl std::fmt::Display for DecodeError {
17    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
18        match self {
19            Self::TooLong => write!(
20                f,
21                "url decoded output exceeds maximum length ({}KB limit)",
22                MAX_URL_DECODED_LEN / 1024
23            ),
24        }
25    }
26}
27
28impl std::error::Error for DecodeError {}
29
30/// Case-insensitive ASCII substring check (no allocation).
31#[inline]
32pub(super) fn contains_ignore_ascii_case(haystack: &str, needle: &str) -> bool {
33    // Edge case: empty needle is always contained, and windows(0) panics
34    if needle.is_empty() {
35        return true;
36    }
37    // Edge case: needle longer than haystack can never be contained
38    if needle.len() > haystack.len() {
39        return false;
40    }
41    haystack
42        .as_bytes()
43        .windows(needle.len())
44        .any(|w| w.eq_ignore_ascii_case(needle.as_bytes()))
45}
46
47/// Basic URL decoding (handles %XX sequences and + as space).
48///
49/// # Errors
50///
51/// Returns [`DecodeError::TooLong`] if decoded output would exceed
52/// `MAX_URL_DECODED_LEN` (64KB). This prevents memory exhaustion from
53/// maliciously crafted inputs.
54///
55/// # Examples
56///
57/// ```ignore
58/// use mik_sdk::url_decode;
59///
60/// assert_eq!(url_decode("hello%20world").unwrap(), "hello world");
61/// assert_eq!(url_decode("hello+world").unwrap(), "hello world");
62/// assert_eq!(url_decode("caf%C3%A9").unwrap(), "café");
63/// ```
64pub fn url_decode(s: &str) -> Result<String, DecodeError> {
65    let mut bytes = Vec::with_capacity(s.len());
66    let mut chars = s.bytes();
67
68    while let Some(b) = chars.next() {
69        // Defense-in-depth: limit decoded output size
70        if bytes.len() >= MAX_URL_DECODED_LEN {
71            return Err(DecodeError::TooLong);
72        }
73
74        match b {
75            b'%' => {
76                // Try to read two hex digits
77                let h1 = chars.next();
78                let h2 = chars.next();
79                if let (Some(h1), Some(h2)) = (h1, h2) {
80                    let hex_str = [h1, h2];
81                    if let Ok(hex_str) = std::str::from_utf8(&hex_str)
82                        && let Ok(decoded) = u8::from_str_radix(hex_str, 16)
83                    {
84                        bytes.push(decoded);
85                        continue;
86                    }
87                    // Invalid escape, keep original bytes
88                    bytes.push(b'%');
89                    bytes.push(h1);
90                    bytes.push(h2);
91                } else {
92                    // Not enough chars after %, keep as-is
93                    bytes.push(b'%');
94                    if let Some(h1) = h1 {
95                        bytes.push(h1);
96                    }
97                }
98            },
99            b'+' => bytes.push(b' '),
100            _ => bytes.push(b),
101        }
102    }
103
104    Ok(String::from_utf8_lossy(&bytes).into_owned())
105}
106
107#[cfg(test)]
108mod tests {
109    use super::*;
110
111    #[test]
112    fn test_url_decode() {
113        assert_eq!(url_decode("hello%20world").unwrap(), "hello world");
114        assert_eq!(url_decode("hello+world").unwrap(), "hello world");
115        assert_eq!(url_decode("a%2Fb").unwrap(), "a/b");
116        assert_eq!(url_decode("plain").unwrap(), "plain");
117    }
118
119    #[test]
120    fn test_url_decode_utf8() {
121        assert_eq!(url_decode("caf%C3%A9").unwrap(), "café");
122        assert_eq!(url_decode("%E4%B8%AD%E6%96%87").unwrap(), "中文");
123    }
124
125    #[test]
126    fn test_url_decode_double_encoding() {
127        // %2520 = %20 (double-encoded space)
128        assert_eq!(url_decode("%2520").unwrap(), "%20"); // Only decodes one level
129    }
130
131    #[test]
132    fn test_url_decode_unicode() {
133        // ✓ character
134        assert_eq!(url_decode("%E2%9C%93").unwrap(), "✓");
135        // 日本語
136        assert_eq!(url_decode("%E6%97%A5%E6%9C%AC%E8%AA%9E").unwrap(), "日本語");
137    }
138
139    #[test]
140    fn test_url_decode_invalid_sequences() {
141        // Invalid hex
142        assert_eq!(url_decode("%GG").unwrap(), "%GG");
143        // Incomplete sequence
144        assert_eq!(url_decode("%2").unwrap(), "%2");
145        assert_eq!(url_decode("%").unwrap(), "%");
146        // Mixed valid/invalid
147        assert_eq!(url_decode("a%20b%GGc%2").unwrap(), "a b%GGc%2");
148    }
149
150    #[test]
151    fn test_url_decode_plus_sign() {
152        assert_eq!(url_decode("hello+world").unwrap(), "hello world");
153        assert_eq!(url_decode("a+b+c").unwrap(), "a b c");
154        assert_eq!(url_decode("+++").unwrap(), "   ");
155    }
156
157    #[test]
158    fn test_contains_ignore_ascii_case() {
159        assert!(contains_ignore_ascii_case("application/json", "json"));
160        assert!(contains_ignore_ascii_case("APPLICATION/JSON", "json"));
161        assert!(contains_ignore_ascii_case("Application/Json", "JSON"));
162        assert!(!contains_ignore_ascii_case("text/html", "json"));
163        assert!(contains_ignore_ascii_case("", ""));
164        assert!(contains_ignore_ascii_case("anything", ""));
165        assert!(!contains_ignore_ascii_case("", "something"));
166    }
167}