ri_cookie_header_string/
lib.rs

1//! A library for parsing HTTP Cookie header strings into structured cookie objects.
2//!
3//! This crate provides an extension trait for the [`cookie`] crate that enables advanced parsing
4//! of cookie header strings (as received in HTTP `Cookie` headers) into a collection of
5//! [`Cookie`] objects.
6//!
7//! **Note**: This is a **non-standard, security-focused parser**. Unlike the standard `SplitCookies` iterator
8//! and RFC 6265 compliance, this library provides smarter parsing for unquoted cookie values that may contain
9//! semicolons. This is useful for handling cookie values that aren't properly quoted or encoded in non-standard
10//! cookie implementations, providing additional safety when parsing untrusted cookie headers.
11//!
12//! # Features
13//!
14//! - **Advanced semicolon handling**: Distinguishes between semicolons that are cookie separators
15//!   and semicolons that appear within unquoted cookie values
16//! - **Iterator-based parsing**: Lazy evaluation returns an iterator over parsed cookies
17//! - **Error handling**: Returns `Result<Cookie, ParseError>` for each cookie, allowing
18//!   graceful handling of malformed entries
19//! - **Percent-encoding support**: Enable the `percent-encode` feature to decode percent-encoded
20//!   cookie values (e.g., `%20` for space)
21//!
22//! # When to Use This Library
23//!
24//! Use this library when:
25//! - Parsing non-standard cookie headers with unquoted values containing semicolons
26//! - You need safety when handling untrusted cookie input with unusual formatting
27//! - Your application requires advanced heuristics to detect cookie boundaries
28//!
29//! **Note**: For standard RFC 6265-compliant cookie parsing, the built-in `cookie` crate
30//! provides `SplitCookies` which is more performant and spec-compliant.
31//!
32//! # Usage
33//!
34//! Add this to your `Cargo.toml`:
35//!
36//! ```toml
37//! [dependencies]
38//! ri-cookie-header-string = "0.1"
39//! cookie = "0.18"
40//! ```
41//!
42//! It's recommended to enable the `percent-encode` feature:
43//!
44//! ```toml
45//! [dependencies]
46//! ri-cookie-header-string = { version = "0.1", features = ["percent-encode"] }
47//! cookie = "0.18"
48//! ```
49//!
50//! # Examples
51//!
52//! Basic usage:
53//!
54//! ```
55//! use ri_cookie_header_string::CookieHeaderStringExt;
56//! use cookie::Cookie;
57//!
58//! let cookie_header = "name=value; name2=value2; name3=value3";
59//! let cookies: Vec<_> = Cookie::header_string_parse(cookie_header)
60//!     .filter_map(|result| result.ok())
61//!     .collect();
62//!
63//! assert_eq!(cookies.len(), 3);
64//! ```
65//!
66//! Handling semicolons in unquoted cookie values:
67//!
68//! ```
69//! use ri_cookie_header_string::CookieHeaderStringExt;
70//! use cookie::Cookie;
71//!
72//! // Semicolon inside unquoted value is preserved correctly
73//! let cookie_header = "session=abc;123; other=value";
74//! let cookies: Vec<_> = Cookie::header_string_parse(cookie_header)
75//!     .filter_map(|result| result.ok())
76//!     .collect();
77//!
78//! assert_eq!(cookies.len(), 2);
79//! assert_eq!(cookies[0].value(), "abc;123");
80//! assert_eq!(cookies[1].value(), "value");
81//! ```
82
83use cookie::{Cookie, ParseError};
84use std::borrow::Cow;
85
86/// Iterator over cookies in a header string.
87///
88/// This iterator provides advanced parsing for non-standard cookie headers with unquoted
89/// values that may contain semicolons. It's not strictly RFC 6265 compliant but handles
90/// real-world edge cases in cookie parsing.
91///
92/// Based on the `cookie` crate's `SplitCookies` iterator with enhanced heuristics.
93pub struct HeaderStringCookies<'c> {
94    // The source string, which we split and parse.
95    string: Cow<'c, str>,
96    // The index where we last split off.
97    last: usize,
98}
99
100/// Helper: check if byte can start a cookie name (alphanumeric or underscore).
101///
102/// Used for heuristic detection of cookie boundaries when disambiguating
103/// whether a semicolon is a separator or part of a value.
104#[inline(always)]
105fn is_cookie_name_start(b: u8) -> bool {
106    matches!(b, b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_')
107}
108
109impl<'c> Iterator for HeaderStringCookies<'c> {
110    type Item = Result<Cookie<'c>, ParseError>;
111
112    fn next(&mut self) -> Option<Self::Item> {
113        let s = self.string.as_ref();
114        let len = s.len();
115
116        while self.last < len {
117            let i = self.last;
118
119            let j = s[i..].find(';').map(|k| i + k).unwrap_or(len);
120
121            // Check if this semicolon is actually a separator or part of value
122            let end_pos = if j < len {
123                // Look ahead to determine if semicolon is separator
124                let after = &s[j + 1..];
125                let trimmed = after.trim_start();
126
127                // Semicolon is separator if:
128                // 1. Followed by whitespace/semicolon only, OR
129                // 2. Followed by a valid cookie name (starts with alnum/underscore) and then '='
130                if trimmed.is_empty() || trimmed.starts_with(';') {
131                    j // Separator
132                } else if let Some(first) = trimmed.as_bytes().first().copied() {
133                    if is_cookie_name_start(first) {
134                        // Check if followed by '=' (indicating new cookie)
135                        if let Some(eq_pos) = trimmed.find('=') {
136                            let name_part = &trimmed[..eq_pos].trim();
137                            // Valid cookie name before '=' means this is a new cookie
138                            if !name_part.is_empty()
139                                && name_part.chars().all(|c| {
140                                    let b = c as u8;
141                                    matches!(b, b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'-')
142                                })
143                            {
144                                j // Separator - new cookie starts here
145                            } else {
146                                // Not a valid cookie, semicolon is part of value - find next real separator
147                                self.find_real_separator(j)
148                            }
149                        } else {
150                            // No '=' found, semicolon is part of value
151                            self.find_real_separator(j)
152                        }
153                    } else {
154                        // Doesn't start with valid cookie char, semicolon is part of value
155                        self.find_real_separator(j)
156                    }
157                } else {
158                    j // End of string
159                }
160            } else {
161                j // No semicolon found, end of string
162            };
163
164            self.last = end_pos + 1;
165
166            let cookie_str = s[i..end_pos].trim();
167
168            // Skip empty cookies
169            if cookie_str.is_empty() {
170                continue;
171            }
172
173            // Find '=' separator
174            let eq_pos = match cookie_str.find('=') {
175                Some(p) => p,
176                None => continue,
177            };
178
179            let name = cookie_str[..eq_pos].trim();
180            let val = cookie_str[eq_pos + 1..].trim();
181
182            if name.is_empty() {
183                continue;
184            }
185
186            // Create cookie - using Cow with owned strings to maintain lifetime
187            let cookie_result = if val.contains('%') {
188                #[cfg(feature = "percent-encode")]
189                {
190                    // Build the cookie string for percent-decoding
191                    let mut cookie_str_buf = String::with_capacity(name.len() + val.len() + 1);
192                    cookie_str_buf.push_str(name);
193                    cookie_str_buf.push('=');
194                    cookie_str_buf.push_str(val);
195                    Cookie::parse_encoded(cookie_str_buf)
196                }
197                #[cfg(not(feature = "percent-encode"))]
198                {
199                    // Without percent-encode feature, treat % as literal character
200                    Ok(Cookie::new(name.to_string(), val.to_string()))
201                }
202            } else {
203                Ok(Cookie::new(name.to_string(), val.to_string()))
204            };
205
206            return Some(cookie_result);
207        }
208
209        None
210    }
211}
212
213impl<'c> HeaderStringCookies<'c> {
214    /// Find the real cookie separator when a semicolon appears within an unquoted value.
215    ///
216    /// This method uses heuristics to determine if a semicolon is a cookie separator
217    /// (indicating the start of a new cookie) or part of the current cookie's value.
218    /// It looks ahead for patterns that indicate a new cookie boundary.
219    #[inline]
220    fn find_real_separator(&self, start: usize) -> usize {
221        let s = self.string.as_ref();
222        let bytes = s.as_bytes();
223        let len = s.len();
224        let mut i = start + 1;
225
226        // Skip whitespace
227        while i < len && bytes[i].is_ascii_whitespace() {
228            i += 1;
229        }
230
231        // Look for next semicolon that's a real separator
232        while i < len {
233            if bytes[i] == b';' {
234                let mut j = i + 1;
235                while j < len && bytes[j].is_ascii_whitespace() {
236                    j += 1;
237                }
238
239                if j >= len || bytes[j] == b';' {
240                    return i; // Real separator
241                }
242
243                // Check if followed by new cookie
244                if j < len && is_cookie_name_start(bytes[j]) {
245                    let mut k = j;
246                    while k < len && matches!(bytes[k], b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'-') {
247                        k += 1;
248                    }
249                    if k < len && bytes[k] == b'=' {
250                        return i; // Real separator - new cookie found
251                    }
252                }
253            }
254            i += 1;
255        }
256
257        len // No separator found, end of string
258    }
259}
260
261pub trait CookieHeaderStringExt<'c> {
262    fn header_string_parse<S>(string: S) -> HeaderStringCookies<'c>
263    where
264        S: Into<Cow<'c, str>>;
265}
266
267impl<'c> CookieHeaderStringExt<'c> for Cookie<'c> {
268    #[inline(always)]
269    fn header_string_parse<S>(string: S) -> HeaderStringCookies<'c>
270    where
271        S: Into<Cow<'c, str>>,
272    {
273        HeaderStringCookies {
274            string: string.into(),
275            last: 0,
276        }
277    }
278}
279
280#[cfg(test)]
281mod tests {
282    use super::*;
283
284    #[test]
285    fn header_string_parse() {
286        let cases = [
287            ("", vec![]),
288            (";;", vec![]),
289            ("name=val;ue", vec![("name", "val;ue")]),
290            ("name=val;ue;hello=world", vec![("name", "val;ue"), ("hello", "world")]),
291        ];
292
293        for (string, expected) in cases {
294            let cookies: Vec<_> = Cookie::header_string_parse(string).filter_map(|parse| parse.ok()).collect();
295
296            let actual: Vec<_> = cookies.iter().map(|c| c.name_value()).collect();
297
298            assert_eq!(expected, actual);
299        }
300    }
301
302    #[test]
303    fn header_string_parse_empty_values() {
304        let cookie_header = "name=; other=value";
305        let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
306
307        assert_eq!(cookies.len(), 2);
308        assert_eq!(cookies[0].value(), "");
309        assert_eq!(cookies[1].value(), "value");
310    }
311
312    #[test]
313    fn header_string_parse_whitespace_handling() {
314        let cookie_header = "  name  =  value  ;  other  =  val  ";
315        let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
316
317        assert_eq!(cookies.len(), 2);
318        assert_eq!(cookies[0].name_value(), ("name", "value"));
319        assert_eq!(cookies[1].name_value(), ("other", "val"));
320    }
321
322    #[test]
323    fn header_string_parse_multiple_consecutive_semicolons() {
324        let cookie_header = "name=;;;value;;;other=val";
325        let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
326
327        // Multiple semicolons create empty entries which are skipped
328        assert!(!cookies.is_empty());
329    }
330
331    #[test]
332    fn header_string_parse_special_characters() {
333        let cookie_header = "session=!@#$%^&*(){}[]; other=value";
334        let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
335
336        assert_eq!(cookies.len(), 2);
337        assert_eq!(cookies[0].value(), "!@#$%^&*(){}[]");
338    }
339
340    #[test]
341    fn header_string_parse_value_with_equals() {
342        let cookie_header = "session=abc=123; other=value";
343        let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
344
345        assert_eq!(cookies.len(), 2);
346        assert_eq!(cookies[0].value(), "abc=123");
347    }
348
349    #[test]
350    fn header_string_parse_long_values() {
351        let long_value = "x".repeat(1000);
352        let cookie_header = format!("name={long_value}; other=val");
353        let cookies: Vec<_> = Cookie::header_string_parse(&cookie_header).filter_map(|parse| parse.ok()).collect();
354
355        assert_eq!(cookies.len(), 2);
356        assert_eq!(cookies[0].value().len(), 1000);
357    }
358
359    #[test]
360    fn header_string_parse_complex_semicolons() {
361        let cookie_header = "session=abc;def;ghi; other=value";
362        let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
363
364        assert_eq!(cookies.len(), 2);
365        assert_eq!(cookies[0].value(), "abc;def;ghi");
366        assert_eq!(cookies[1].value(), "value");
367    }
368
369    #[test]
370    #[cfg(feature = "percent-encode")]
371    fn header_string_parse_percent_encoded() {
372        let cookie_header = "name=val%20ue";
373        let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
374
375        assert_eq!(cookies.len(), 1);
376        assert_eq!(cookies[0].name_value(), ("name", "val ue"));
377    }
378
379    #[test]
380    #[cfg(feature = "percent-encode")]
381    fn header_string_parse_percent_encoded_semicolon() {
382        let cookie_header = "name=val%3B123; other=value";
383        let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
384
385        assert_eq!(cookies.len(), 2);
386        assert_eq!(cookies[0].value(), "val;123");
387    }
388
389    #[test]
390    fn header_string_parse_numeric_names() {
391        let cookie_header = "123=value; _456=other";
392        let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
393
394        assert_eq!(cookies.len(), 2);
395        assert_eq!(cookies[0].name(), "123");
396    }
397
398    #[test]
399    fn header_string_parse_hyphenated_names() {
400        let cookie_header = "session-id=value; other-val=data";
401        let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
402
403        assert_eq!(cookies.len(), 2);
404        assert_eq!(cookies[0].name(), "session-id");
405    }
406}