ri_cookie_header_string/lib.rs
1//! A library for parsing HTTP Cookie header strings into structured cookie objects.
2//!
3//! This crate provides extension traits for parsing cookie header strings into structured cookie objects.
4//! It supports multiple cookie implementations including the [`cookie`] crate and optionally [`reqwest`].
5//!
6//! **Note**: This is a **non-standard, security-focused parser**. Unlike the standard `SplitCookies` iterator
7//! and RFC 6265 compliance, this library provides smarter parsing for unquoted cookie values that may contain
8//! semicolons. This is useful for handling cookie values that aren't properly quoted or encoded in non-standard
9//! cookie implementations, providing additional safety when parsing untrusted cookie headers.
10//!
11//! # Features
12//!
13//! - **Advanced semicolon handling**: Distinguishes between semicolons that are cookie separators
14//! and semicolons that appear within unquoted cookie values
15//! - **Iterator-based parsing**: Lazy evaluation returns an iterator over parsed cookies
16//! - **Error handling**: Returns `Result<Cookie, ParseError>` for each cookie, allowing
17//! graceful handling of malformed entries
18//! - **Percent-encoding support**: Enable the `percent-encode` feature to decode percent-encoded
19//! cookie values (e.g., `%20` for space)
20//! - **Multiple cookie implementations**: Support for `cookie` crate and optionally `reqwest` via feature flag
21//!
22//! # When to Use This Library
23//!
24//! Use this library when:
25//! - Parsing non-standard cookie headers with unquoted values containing semicolons
26//! - You need safety when handling untrusted cookie input with unusual formatting
27//! - Your application requires advanced heuristics to detect cookie boundaries
28//! - You work with different cookie implementations across your project
29//!
30//! **Note**: For standard RFC 6265-compliant cookie parsing, the built-in `cookie` crate
31//! provides `SplitCookies` which is more performant and spec-compliant.
32//!
33//! # Usage
34//!
35//! Add this to your `Cargo.toml`:
36//!
37//! ```toml
38//! [dependencies]
39//! ri-cookie-header-string = "0.2"
40//! cookie = "0.18"
41//! ```
42//!
43//! It's recommended to enable the `percent-encode` feature:
44//!
45//! ```toml
46//! [dependencies]
47//! ri-cookie-header-string = { version = "0.2", features = ["percent-encode"] }
48//! cookie = "0.18"
49//! ```
50//!
51//! For reqwest support, enable the `reqwest` feature:
52//!
53//! ```toml
54//! [dependencies]
55//! ri-cookie-header-string = { version = "0.2", features = ["reqwest"] }
56//! reqwest = { version = "0.12", features = ["cookies"] }
57//! ```
58//!
59//! # Examples
60//!
61//! Basic usage with `cookie` crate:
62//!
63//! ```
64//! use ri_cookie_header_string::CookieHeaderStringExt;
65//! use cookie::Cookie;
66//!
67//! let cookie_header = "name=value; name2=value2; name3=value3";
68//! let cookies: Vec<_> = Cookie::header_string_parse(cookie_header)
69//! .filter_map(|result| result.ok())
70//! .collect();
71//!
72//! assert_eq!(cookies.len(), 3);
73//! ```
74//!
75//! Handling semicolons in unquoted cookie values:
76//!
77//! ```
78//! use ri_cookie_header_string::CookieHeaderStringExt;
79//! use cookie::Cookie;
80//!
81//! // Semicolon inside unquoted value is preserved correctly
82//! let cookie_header = "session=abc;123; other=value";
83//! let cookies: Vec<_> = Cookie::header_string_parse(cookie_header)
84//! .filter_map(|result| result.ok())
85//! .collect();
86//!
87//! assert_eq!(cookies.len(), 2);
88//! assert_eq!(cookies[0].value(), "abc;123");
89//! assert_eq!(cookies[1].value(), "value");
90//! ```
91
92use cookie::{Cookie, ParseError};
93use std::borrow::Cow;
94
95/// Internal trait for abstracting cookie construction across different cookie implementations.
96///
97/// This trait allows the parser to work with different cookie types (e.g., `cookie::Cookie`,
98/// `reqwest::cookie::Cookie`) by providing a common interface for creating cookies.
99pub trait CookieBuilder: Sized {
100 /// Create a new cookie with the given name and value.
101 fn new(name: String, value: String) -> Self;
102
103 /// Create a cookie from a percent-encoded string.
104 ///
105 /// This is only called when the `percent-encode` feature is enabled
106 /// and the cookie value contains `%` characters.
107 #[cfg(feature = "percent-encode")]
108 fn parse_encoded(cookie_str: String) -> Result<Self, ParseError>;
109}
110
111/// Iterator over cookies in a header string.
112///
113/// This iterator provides advanced parsing for non-standard cookie headers with unquoted
114/// values that may contain semicolons. It's not strictly RFC 6265 compliant but handles
115/// real-world edge cases in cookie parsing.
116///
117/// Based on the `cookie` crate's `SplitCookies` iterator with enhanced heuristics.
118pub struct HeaderStringCookies<'c, C: CookieBuilder> {
119 // The source string, which we split and parse.
120 string: Cow<'c, str>,
121 // The index where we last split off.
122 last: usize,
123 // Phantom data to hold the cookie builder type
124 _phantom: std::marker::PhantomData<C>,
125}
126
127/// Helper: check if byte can start a cookie name (alphanumeric or underscore).
128///
129/// Used for heuristic detection of cookie boundaries when disambiguating
130/// whether a semicolon is a separator or part of a value.
131#[inline(always)]
132fn is_cookie_name_start(b: u8) -> bool {
133 matches!(b, b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_')
134}
135
136impl<'c, C: CookieBuilder> Iterator for HeaderStringCookies<'c, C> {
137 type Item = Result<C, ParseError>;
138
139 fn next(&mut self) -> Option<Self::Item> {
140 let s = self.string.as_ref();
141 let len = s.len();
142
143 while self.last < len {
144 let i = self.last;
145
146 let j = s[i..].find(';').map(|k| i + k).unwrap_or(len);
147
148 // Check if this semicolon is actually a separator or part of value
149 let end_pos = if j < len {
150 // Look ahead to determine if semicolon is separator
151 let after = &s[j + 1..];
152 let trimmed = after.trim_start();
153
154 // Semicolon is separator if:
155 // 1. Followed by whitespace/semicolon only, OR
156 // 2. Followed by a valid cookie name (starts with alnum/underscore) and then '='
157 if trimmed.is_empty() || trimmed.starts_with(';') {
158 j // Separator
159 } else if let Some(first) = trimmed.as_bytes().first().copied() {
160 if is_cookie_name_start(first) {
161 // Check if followed by '=' (indicating new cookie)
162 if let Some(eq_pos) = trimmed.find('=') {
163 let name_part = &trimmed[..eq_pos].trim();
164 // Valid cookie name before '=' means this is a new cookie
165 if !name_part.is_empty()
166 && name_part.chars().all(|c| {
167 let b = c as u8;
168 matches!(b, b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'-')
169 })
170 {
171 j // Separator - new cookie starts here
172 } else {
173 // Not a valid cookie, semicolon is part of value - find next real separator
174 self.find_real_separator(j)
175 }
176 } else {
177 // No '=' found, semicolon is part of value
178 self.find_real_separator(j)
179 }
180 } else {
181 // Doesn't start with valid cookie char, semicolon is part of value
182 self.find_real_separator(j)
183 }
184 } else {
185 j // End of string
186 }
187 } else {
188 j // No semicolon found, end of string
189 };
190
191 self.last = end_pos + 1;
192
193 let cookie_str = s[i..end_pos].trim();
194
195 // Skip empty cookies
196 if cookie_str.is_empty() {
197 continue;
198 }
199
200 // Find '=' separator
201 let eq_pos = match cookie_str.find('=') {
202 Some(p) => p,
203 None => continue,
204 };
205
206 let name = cookie_str[..eq_pos].trim();
207 let val = cookie_str[eq_pos + 1..].trim();
208
209 if name.is_empty() {
210 continue;
211 }
212
213 // Create cookie - using owned strings for compatibility across implementations
214 let cookie_result = if val.contains('%') {
215 #[cfg(feature = "percent-encode")]
216 {
217 // Build the cookie string for percent-decoding
218 let mut cookie_str_buf = String::with_capacity(name.len() + val.len() + 1);
219 cookie_str_buf.push_str(name);
220 cookie_str_buf.push('=');
221 cookie_str_buf.push_str(val);
222 C::parse_encoded(cookie_str_buf)
223 }
224 #[cfg(not(feature = "percent-encode"))]
225 {
226 // Without percent-encode feature, treat % as literal character
227 Ok(C::new(name.to_string(), val.to_string()))
228 }
229 } else {
230 Ok(C::new(name.to_string(), val.to_string()))
231 };
232
233 return Some(cookie_result);
234 }
235
236 None
237 }
238}
239
240impl<'c, C: CookieBuilder> HeaderStringCookies<'c, C> {
241 /// Find the real cookie separator when a semicolon appears within an unquoted value.
242 ///
243 /// This method uses heuristics to determine if a semicolon is a cookie separator
244 /// (indicating the start of a new cookie) or part of the current cookie's value.
245 /// It looks ahead for patterns that indicate a new cookie boundary.
246 #[inline]
247 fn find_real_separator(&self, start: usize) -> usize {
248 let s = self.string.as_ref();
249 let bytes = s.as_bytes();
250 let len = s.len();
251 let mut i = start + 1;
252
253 // Skip whitespace
254 while i < len && bytes[i].is_ascii_whitespace() {
255 i += 1;
256 }
257
258 // Look for next semicolon that's a real separator
259 while i < len {
260 if bytes[i] == b';' {
261 let mut j = i + 1;
262 while j < len && bytes[j].is_ascii_whitespace() {
263 j += 1;
264 }
265
266 if j >= len || bytes[j] == b';' {
267 return i; // Real separator
268 }
269
270 // Check if followed by new cookie
271 if j < len && is_cookie_name_start(bytes[j]) {
272 let mut k = j;
273 while k < len && matches!(bytes[k], b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'-') {
274 k += 1;
275 }
276 if k < len && bytes[k] == b'=' {
277 return i; // Real separator - new cookie found
278 }
279 }
280 }
281 i += 1;
282 }
283
284 len // No separator found, end of string
285 }
286}
287
288pub trait CookieHeaderStringExt<'c, C: CookieBuilder> {
289 fn header_string_parse<S>(string: S) -> HeaderStringCookies<'c, C>
290 where
291 S: Into<Cow<'c, str>>;
292}
293
294/// Implementation of CookieBuilder for `cookie::Cookie`
295impl CookieBuilder for Cookie<'static> {
296 fn new(name: String, value: String) -> Self {
297 Cookie::new(name, value)
298 }
299
300 #[cfg(feature = "percent-encode")]
301 fn parse_encoded(cookie_str: String) -> Result<Self, ParseError> {
302 Cookie::parse_encoded(cookie_str)
303 }
304}
305
306impl<'c> CookieHeaderStringExt<'c, Cookie<'static>> for Cookie<'c> {
307 #[inline(always)]
308 fn header_string_parse<S>(string: S) -> HeaderStringCookies<'c, Cookie<'static>>
309 where
310 S: Into<Cow<'c, str>>,
311 {
312 HeaderStringCookies {
313 string: string.into(),
314 last: 0,
315 _phantom: std::marker::PhantomData,
316 }
317 }
318}
319
320/// Optional support for reqwest integration when `reqwest` feature is enabled.
321#[cfg(feature = "reqwest")]
322pub mod reqwest_support {
323 use super::*;
324
325 /// Parse a cookie header string into cookies compatible with reqwest.
326 ///
327 /// This function parses HTTP Cookie header strings into `cookie::Cookie` objects
328 /// that can be used with reqwest. Since `reqwest::cookie::Cookie` is a read-only
329 /// wrapper, we work with the underlying `cookie::Cookie` type.
330 ///
331 /// # Example
332 ///
333 /// ```text
334 /// use ri_cookie_header_string::reqwest_support::parse_for_reqwest;
335 /// use url::Url;
336 ///
337 /// let cookie_header = "session=abc123; user=john";
338 /// let cookies: Vec<_> = parse_for_reqwest(cookie_header)
339 /// .filter_map(|result| result.ok())
340 /// .collect();
341 ///
342 /// // Use with reqwest cookie jar
343 /// let jar = reqwest::cookie::Jar::default();
344 /// let url: Url = "https://example.com".parse().unwrap();
345 /// for cookie in cookies {
346 /// // Cookies can be serialized and added to jar
347 /// jar.add_cookie_str(&cookie.to_string(), &url);
348 /// }
349 /// ```
350 pub fn parse_for_reqwest<'c, S>(string: S) -> HeaderStringCookies<'c, Cookie<'static>>
351 where
352 S: Into<Cow<'c, str>>,
353 {
354 HeaderStringCookies {
355 string: string.into(),
356 last: 0,
357 _phantom: std::marker::PhantomData,
358 }
359 }
360}
361
362#[cfg(test)]
363mod tests {
364 use super::*;
365
366 #[test]
367 fn header_string_parse() {
368 let cases = [
369 ("", vec![]),
370 (";;", vec![]),
371 ("name=val;ue", vec![("name", "val;ue")]),
372 ("name=val;ue;hello=world", vec![("name", "val;ue"), ("hello", "world")]),
373 ];
374
375 for (string, expected) in cases {
376 let cookies: Vec<_> = Cookie::header_string_parse(string).filter_map(|parse| parse.ok()).collect();
377
378 let actual: Vec<_> = cookies.iter().map(|c| c.name_value()).collect();
379
380 assert_eq!(expected, actual);
381 }
382 }
383
384 #[test]
385 fn header_string_parse_empty_values() {
386 let cookie_header = "name=; other=value";
387 let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
388
389 assert_eq!(cookies.len(), 2);
390 assert_eq!(cookies[0].value(), "");
391 assert_eq!(cookies[1].value(), "value");
392 }
393
394 #[test]
395 fn header_string_parse_whitespace_handling() {
396 let cookie_header = " name = value ; other = val ";
397 let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
398
399 assert_eq!(cookies.len(), 2);
400 assert_eq!(cookies[0].name_value(), ("name", "value"));
401 assert_eq!(cookies[1].name_value(), ("other", "val"));
402 }
403
404 #[test]
405 fn header_string_parse_multiple_consecutive_semicolons() {
406 let cookie_header = "name=;;;value;;;other=val";
407 let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
408
409 // Multiple semicolons create empty entries which are skipped
410 assert!(!cookies.is_empty());
411 }
412
413 #[test]
414 fn header_string_parse_special_characters() {
415 let cookie_header = "session=!@#$%^&*(){}[]; other=value";
416 let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
417
418 assert_eq!(cookies.len(), 2);
419 assert_eq!(cookies[0].value(), "!@#$%^&*(){}[]");
420 }
421
422 #[test]
423 fn header_string_parse_value_with_equals() {
424 let cookie_header = "session=abc=123; other=value";
425 let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
426
427 assert_eq!(cookies.len(), 2);
428 assert_eq!(cookies[0].value(), "abc=123");
429 }
430
431 #[test]
432 fn header_string_parse_long_values() {
433 let long_value = "x".repeat(1000);
434 let cookie_header = format!("name={long_value}; other=val");
435 let cookies: Vec<_> = Cookie::header_string_parse(&cookie_header).filter_map(|parse| parse.ok()).collect();
436
437 assert_eq!(cookies.len(), 2);
438 assert_eq!(cookies[0].value().len(), 1000);
439 }
440
441 #[test]
442 fn header_string_parse_complex_semicolons() {
443 let cookie_header = "session=abc;def;ghi; other=value";
444 let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
445
446 assert_eq!(cookies.len(), 2);
447 assert_eq!(cookies[0].value(), "abc;def;ghi");
448 assert_eq!(cookies[1].value(), "value");
449 }
450
451 #[test]
452 #[cfg(feature = "percent-encode")]
453 fn header_string_parse_percent_encoded() {
454 let cookie_header = "name=val%20ue";
455 let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
456
457 assert_eq!(cookies.len(), 1);
458 assert_eq!(cookies[0].name_value(), ("name", "val ue"));
459 }
460
461 #[test]
462 #[cfg(feature = "percent-encode")]
463 fn header_string_parse_percent_encoded_semicolon() {
464 let cookie_header = "name=val%3B123; other=value";
465 let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
466
467 assert_eq!(cookies.len(), 2);
468 assert_eq!(cookies[0].value(), "val;123");
469 }
470
471 #[test]
472 fn header_string_parse_numeric_names() {
473 let cookie_header = "123=value; _456=other";
474 let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
475
476 assert_eq!(cookies.len(), 2);
477 assert_eq!(cookies[0].name(), "123");
478 }
479
480 #[test]
481 fn header_string_parse_hyphenated_names() {
482 let cookie_header = "session-id=value; other-val=data";
483 let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
484
485 assert_eq!(cookies.len(), 2);
486 assert_eq!(cookies[0].name(), "session-id");
487 }
488
489 #[test]
490 #[cfg(feature = "reqwest")]
491 fn header_string_parse_reqwest() {
492 use crate::reqwest_support::parse_for_reqwest;
493
494 let cookie_header = "session=abc;123; other=value";
495 let cookies: Vec<_> = parse_for_reqwest(cookie_header).filter_map(|result| result.ok()).collect();
496
497 assert_eq!(cookies.len(), 2);
498 assert_eq!(cookies[0].value(), "abc;123");
499 assert_eq!(cookies[1].value(), "value");
500 }
501}