ri_cookie_header_string/
lib.rs

1//! A library for parsing HTTP Cookie header strings into structured cookie objects.
2//!
3//! This crate provides extension traits for parsing cookie header strings into structured cookie objects.
4//! It supports multiple cookie implementations including the [`cookie`] crate and optionally [`reqwest`].
5//!
6//! **Note**: This is a **non-standard, security-focused parser**. Unlike the standard `SplitCookies` iterator
7//! and RFC 6265 compliance, this library provides smarter parsing for unquoted cookie values that may contain
8//! semicolons. This is useful for handling cookie values that aren't properly quoted or encoded in non-standard
9//! cookie implementations, providing additional safety when parsing untrusted cookie headers.
10//!
11//! # Features
12//!
13//! - **Advanced semicolon handling**: Distinguishes between semicolons that are cookie separators
14//!   and semicolons that appear within unquoted cookie values
15//! - **Iterator-based parsing**: Lazy evaluation returns an iterator over parsed cookies
16//! - **Error handling**: Returns `Result<Cookie, ParseError>` for each cookie, allowing
17//!   graceful handling of malformed entries
18//! - **Percent-encoding support**: Enable the `percent-encode` feature to decode percent-encoded
19//!   cookie values (e.g., `%20` for space)
20//! - **Multiple cookie implementations**: Support for `cookie` crate and optionally `reqwest` via feature flag
21//!
22//! # When to Use This Library
23//!
24//! Use this library when:
25//! - Parsing non-standard cookie headers with unquoted values containing semicolons
26//! - You need safety when handling untrusted cookie input with unusual formatting
27//! - Your application requires advanced heuristics to detect cookie boundaries
28//! - You work with different cookie implementations across your project
29//!
30//! **Note**: For standard RFC 6265-compliant cookie parsing, the built-in `cookie` crate
31//! provides `SplitCookies` which is more performant and spec-compliant.
32//!
33//! # Usage
34//!
35//! Add this to your `Cargo.toml`:
36//!
37//! ```toml
38//! [dependencies]
39//! ri-cookie-header-string = "0.2"
40//! cookie = "0.18"
41//! ```
42//!
43//! It's recommended to enable the `percent-encode` feature:
44//!
45//! ```toml
46//! [dependencies]
47//! ri-cookie-header-string = { version = "0.2", features = ["percent-encode"] }
48//! cookie = "0.18"
49//! ```
50//!
51//! For reqwest support, enable the `reqwest` feature:
52//!
53//! ```toml
54//! [dependencies]
55//! ri-cookie-header-string = { version = "0.2", features = ["reqwest"] }
56//! reqwest = { version = "0.12", features = ["cookies"] }
57//! ```
58//!
59//! # Examples
60//!
61//! Basic usage with `cookie` crate:
62//!
63//! ```
64//! use ri_cookie_header_string::CookieHeaderStringExt;
65//! use cookie::Cookie;
66//!
67//! let cookie_header = "name=value; name2=value2; name3=value3";
68//! let cookies: Vec<_> = Cookie::header_string_parse(cookie_header)
69//!     .filter_map(|result| result.ok())
70//!     .collect();
71//!
72//! assert_eq!(cookies.len(), 3);
73//! ```
74//!
75//! Handling semicolons in unquoted cookie values:
76//!
77//! ```
78//! use ri_cookie_header_string::CookieHeaderStringExt;
79//! use cookie::Cookie;
80//!
81//! // Semicolon inside unquoted value is preserved correctly
82//! let cookie_header = "session=abc;123; other=value";
83//! let cookies: Vec<_> = Cookie::header_string_parse(cookie_header)
84//!     .filter_map(|result| result.ok())
85//!     .collect();
86//!
87//! assert_eq!(cookies.len(), 2);
88//! assert_eq!(cookies[0].value(), "abc;123");
89//! assert_eq!(cookies[1].value(), "value");
90//! ```
91
92use cookie::{Cookie, ParseError};
93use std::borrow::Cow;
94
95/// Internal trait for abstracting cookie construction across different cookie implementations.
96///
97/// This trait allows the parser to work with different cookie types (e.g., `cookie::Cookie`,
98/// `reqwest::cookie::Cookie`) by providing a common interface for creating cookies.
99pub trait CookieBuilder: Sized {
100    /// Create a new cookie with the given name and value.
101    fn new(name: String, value: String) -> Self;
102
103    /// Create a cookie from a percent-encoded string.
104    ///
105    /// This is only called when the `percent-encode` feature is enabled
106    /// and the cookie value contains `%` characters.
107    #[cfg(feature = "percent-encode")]
108    fn parse_encoded(cookie_str: String) -> Result<Self, ParseError>;
109}
110
111/// Iterator over cookies in a header string.
112///
113/// This iterator provides advanced parsing for non-standard cookie headers with unquoted
114/// values that may contain semicolons. It's not strictly RFC 6265 compliant but handles
115/// real-world edge cases in cookie parsing.
116///
117/// Based on the `cookie` crate's `SplitCookies` iterator with enhanced heuristics.
118pub struct HeaderStringCookies<'c, C: CookieBuilder> {
119    // The source string, which we split and parse.
120    string: Cow<'c, str>,
121    // The index where we last split off.
122    last: usize,
123    // Phantom data to hold the cookie builder type
124    _phantom: std::marker::PhantomData<C>,
125}
126
127/// Helper: check if byte can start a cookie name (alphanumeric or underscore).
128///
129/// Used for heuristic detection of cookie boundaries when disambiguating
130/// whether a semicolon is a separator or part of a value.
131#[inline(always)]
132fn is_cookie_name_start(b: u8) -> bool {
133    matches!(b, b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_')
134}
135
136impl<'c, C: CookieBuilder> Iterator for HeaderStringCookies<'c, C> {
137    type Item = Result<C, ParseError>;
138
139    fn next(&mut self) -> Option<Self::Item> {
140        let s = self.string.as_ref();
141        let len = s.len();
142
143        while self.last < len {
144            let i = self.last;
145
146            let j = s[i..].find(';').map(|k| i + k).unwrap_or(len);
147
148            // Check if this semicolon is actually a separator or part of value
149            let end_pos = if j < len {
150                // Look ahead to determine if semicolon is separator
151                let after = &s[j + 1..];
152                let trimmed = after.trim_start();
153
154                // Semicolon is separator if:
155                // 1. Followed by whitespace/semicolon only, OR
156                // 2. Followed by a valid cookie name (starts with alnum/underscore) and then '='
157                if trimmed.is_empty() || trimmed.starts_with(';') {
158                    j // Separator
159                } else if let Some(first) = trimmed.as_bytes().first().copied() {
160                    if is_cookie_name_start(first) {
161                        // Check if followed by '=' (indicating new cookie)
162                        if let Some(eq_pos) = trimmed.find('=') {
163                            let name_part = &trimmed[..eq_pos].trim();
164                            // Valid cookie name before '=' means this is a new cookie
165                            if !name_part.is_empty()
166                                && name_part.chars().all(|c| {
167                                    let b = c as u8;
168                                    matches!(b, b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'-')
169                                })
170                            {
171                                j // Separator - new cookie starts here
172                            } else {
173                                // Not a valid cookie, semicolon is part of value - find next real separator
174                                self.find_real_separator(j)
175                            }
176                        } else {
177                            // No '=' found, semicolon is part of value
178                            self.find_real_separator(j)
179                        }
180                    } else {
181                        // Doesn't start with valid cookie char, semicolon is part of value
182                        self.find_real_separator(j)
183                    }
184                } else {
185                    j // End of string
186                }
187            } else {
188                j // No semicolon found, end of string
189            };
190
191            self.last = end_pos + 1;
192
193            let cookie_str = s[i..end_pos].trim();
194
195            // Skip empty cookies
196            if cookie_str.is_empty() {
197                continue;
198            }
199
200            // Find '=' separator
201            let eq_pos = match cookie_str.find('=') {
202                Some(p) => p,
203                None => continue,
204            };
205
206            let name = cookie_str[..eq_pos].trim();
207            let val = cookie_str[eq_pos + 1..].trim();
208
209            if name.is_empty() {
210                continue;
211            }
212
213            // Create cookie - using owned strings for compatibility across implementations
214            let cookie_result = if val.contains('%') {
215                #[cfg(feature = "percent-encode")]
216                {
217                    // Build the cookie string for percent-decoding
218                    let mut cookie_str_buf = String::with_capacity(name.len() + val.len() + 1);
219                    cookie_str_buf.push_str(name);
220                    cookie_str_buf.push('=');
221                    cookie_str_buf.push_str(val);
222                    C::parse_encoded(cookie_str_buf)
223                }
224                #[cfg(not(feature = "percent-encode"))]
225                {
226                    // Without percent-encode feature, treat % as literal character
227                    Ok(C::new(name.to_string(), val.to_string()))
228                }
229            } else {
230                Ok(C::new(name.to_string(), val.to_string()))
231            };
232
233            return Some(cookie_result);
234        }
235
236        None
237    }
238}
239
240impl<'c, C: CookieBuilder> HeaderStringCookies<'c, C> {
241    /// Find the real cookie separator when a semicolon appears within an unquoted value.
242    ///
243    /// This method uses heuristics to determine if a semicolon is a cookie separator
244    /// (indicating the start of a new cookie) or part of the current cookie's value.
245    /// It looks ahead for patterns that indicate a new cookie boundary.
246    #[inline]
247    fn find_real_separator(&self, start: usize) -> usize {
248        let s = self.string.as_ref();
249        let bytes = s.as_bytes();
250        let len = s.len();
251        let mut i = start + 1;
252
253        // Skip whitespace
254        while i < len && bytes[i].is_ascii_whitespace() {
255            i += 1;
256        }
257
258        // Look for next semicolon that's a real separator
259        while i < len {
260            if bytes[i] == b';' {
261                let mut j = i + 1;
262                while j < len && bytes[j].is_ascii_whitespace() {
263                    j += 1;
264                }
265
266                if j >= len || bytes[j] == b';' {
267                    return i; // Real separator
268                }
269
270                // Check if followed by new cookie
271                if j < len && is_cookie_name_start(bytes[j]) {
272                    let mut k = j;
273                    while k < len && matches!(bytes[k], b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'-') {
274                        k += 1;
275                    }
276                    if k < len && bytes[k] == b'=' {
277                        return i; // Real separator - new cookie found
278                    }
279                }
280            }
281            i += 1;
282        }
283
284        len // No separator found, end of string
285    }
286}
287
288pub trait CookieHeaderStringExt<'c, C: CookieBuilder> {
289    fn header_string_parse<S>(string: S) -> HeaderStringCookies<'c, C>
290    where
291        S: Into<Cow<'c, str>>;
292}
293
294/// Implementation of CookieBuilder for `cookie::Cookie`
295impl CookieBuilder for Cookie<'static> {
296    fn new(name: String, value: String) -> Self {
297        Cookie::new(name, value)
298    }
299
300    #[cfg(feature = "percent-encode")]
301    fn parse_encoded(cookie_str: String) -> Result<Self, ParseError> {
302        Cookie::parse_encoded(cookie_str)
303    }
304}
305
306impl<'c> CookieHeaderStringExt<'c, Cookie<'static>> for Cookie<'c> {
307    #[inline(always)]
308    fn header_string_parse<S>(string: S) -> HeaderStringCookies<'c, Cookie<'static>>
309    where
310        S: Into<Cow<'c, str>>,
311    {
312        HeaderStringCookies {
313            string: string.into(),
314            last: 0,
315            _phantom: std::marker::PhantomData,
316        }
317    }
318}
319
320/// Optional support for reqwest integration when `reqwest` feature is enabled.
321#[cfg(feature = "reqwest")]
322pub mod reqwest_support {
323    use super::*;
324
325    /// Parse a cookie header string into cookies compatible with reqwest.
326    ///
327    /// This function parses HTTP Cookie header strings into `cookie::Cookie` objects
328    /// that can be used with reqwest. Since `reqwest::cookie::Cookie` is a read-only
329    /// wrapper, we work with the underlying `cookie::Cookie` type.
330    ///
331    /// # Example
332    ///
333    /// ```text
334    /// use ri_cookie_header_string::reqwest_support::parse_for_reqwest;
335    /// use url::Url;
336    ///
337    /// let cookie_header = "session=abc123; user=john";
338    /// let cookies: Vec<_> = parse_for_reqwest(cookie_header)
339    ///     .filter_map(|result| result.ok())
340    ///     .collect();
341    ///
342    /// // Use with reqwest cookie jar
343    /// let jar = reqwest::cookie::Jar::default();
344    /// let url: Url = "https://example.com".parse().unwrap();
345    /// for cookie in cookies {
346    ///     // Cookies can be serialized and added to jar
347    ///     jar.add_cookie_str(&cookie.to_string(), &url);
348    /// }
349    /// ```
350    pub fn parse_for_reqwest<'c, S>(string: S) -> HeaderStringCookies<'c, Cookie<'static>>
351    where
352        S: Into<Cow<'c, str>>,
353    {
354        HeaderStringCookies {
355            string: string.into(),
356            last: 0,
357            _phantom: std::marker::PhantomData,
358        }
359    }
360}
361
362#[cfg(test)]
363mod tests {
364    use super::*;
365
366    #[test]
367    fn header_string_parse() {
368        let cases = [
369            ("", vec![]),
370            (";;", vec![]),
371            ("name=val;ue", vec![("name", "val;ue")]),
372            ("name=val;ue;hello=world", vec![("name", "val;ue"), ("hello", "world")]),
373        ];
374
375        for (string, expected) in cases {
376            let cookies: Vec<_> = Cookie::header_string_parse(string).filter_map(|parse| parse.ok()).collect();
377
378            let actual: Vec<_> = cookies.iter().map(|c| c.name_value()).collect();
379
380            assert_eq!(expected, actual);
381        }
382    }
383
384    #[test]
385    fn header_string_parse_empty_values() {
386        let cookie_header = "name=; other=value";
387        let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
388
389        assert_eq!(cookies.len(), 2);
390        assert_eq!(cookies[0].value(), "");
391        assert_eq!(cookies[1].value(), "value");
392    }
393
394    #[test]
395    fn header_string_parse_whitespace_handling() {
396        let cookie_header = "  name  =  value  ;  other  =  val  ";
397        let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
398
399        assert_eq!(cookies.len(), 2);
400        assert_eq!(cookies[0].name_value(), ("name", "value"));
401        assert_eq!(cookies[1].name_value(), ("other", "val"));
402    }
403
404    #[test]
405    fn header_string_parse_multiple_consecutive_semicolons() {
406        let cookie_header = "name=;;;value;;;other=val";
407        let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
408
409        // Multiple semicolons create empty entries which are skipped
410        assert!(!cookies.is_empty());
411    }
412
413    #[test]
414    fn header_string_parse_special_characters() {
415        let cookie_header = "session=!@#$%^&*(){}[]; other=value";
416        let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
417
418        assert_eq!(cookies.len(), 2);
419        assert_eq!(cookies[0].value(), "!@#$%^&*(){}[]");
420    }
421
422    #[test]
423    fn header_string_parse_value_with_equals() {
424        let cookie_header = "session=abc=123; other=value";
425        let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
426
427        assert_eq!(cookies.len(), 2);
428        assert_eq!(cookies[0].value(), "abc=123");
429    }
430
431    #[test]
432    fn header_string_parse_long_values() {
433        let long_value = "x".repeat(1000);
434        let cookie_header = format!("name={long_value}; other=val");
435        let cookies: Vec<_> = Cookie::header_string_parse(&cookie_header).filter_map(|parse| parse.ok()).collect();
436
437        assert_eq!(cookies.len(), 2);
438        assert_eq!(cookies[0].value().len(), 1000);
439    }
440
441    #[test]
442    fn header_string_parse_complex_semicolons() {
443        let cookie_header = "session=abc;def;ghi; other=value";
444        let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
445
446        assert_eq!(cookies.len(), 2);
447        assert_eq!(cookies[0].value(), "abc;def;ghi");
448        assert_eq!(cookies[1].value(), "value");
449    }
450
451    #[test]
452    #[cfg(feature = "percent-encode")]
453    fn header_string_parse_percent_encoded() {
454        let cookie_header = "name=val%20ue";
455        let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
456
457        assert_eq!(cookies.len(), 1);
458        assert_eq!(cookies[0].name_value(), ("name", "val ue"));
459    }
460
461    #[test]
462    #[cfg(feature = "percent-encode")]
463    fn header_string_parse_percent_encoded_semicolon() {
464        let cookie_header = "name=val%3B123; other=value";
465        let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
466
467        assert_eq!(cookies.len(), 2);
468        assert_eq!(cookies[0].value(), "val;123");
469    }
470
471    #[test]
472    fn header_string_parse_numeric_names() {
473        let cookie_header = "123=value; _456=other";
474        let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
475
476        assert_eq!(cookies.len(), 2);
477        assert_eq!(cookies[0].name(), "123");
478    }
479
480    #[test]
481    fn header_string_parse_hyphenated_names() {
482        let cookie_header = "session-id=value; other-val=data";
483        let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
484
485        assert_eq!(cookies.len(), 2);
486        assert_eq!(cookies[0].name(), "session-id");
487    }
488
489    #[test]
490    #[cfg(feature = "reqwest")]
491    fn header_string_parse_reqwest() {
492        use crate::reqwest_support::parse_for_reqwest;
493
494        let cookie_header = "session=abc;123; other=value";
495        let cookies: Vec<_> = parse_for_reqwest(cookie_header).filter_map(|result| result.ok()).collect();
496
497        assert_eq!(cookies.len(), 2);
498        assert_eq!(cookies[0].value(), "abc;123");
499        assert_eq!(cookies[1].value(), "value");
500    }
501}