ri_cookie_header_string/lib.rs
1//! A library for parsing HTTP Cookie header strings into structured cookie objects.
2//!
3//! This crate provides an extension trait for the [`cookie`] crate that enables advanced parsing
4//! of cookie header strings (as received in HTTP `Cookie` headers) into a collection of
5//! [`Cookie`] objects.
6//!
7//! **Note**: This is a **non-standard, security-focused parser**. Unlike the standard `SplitCookies` iterator
8//! and RFC 6265 compliance, this library provides smarter parsing for unquoted cookie values that may contain
9//! semicolons. This is useful for handling cookie values that aren't properly quoted or encoded in non-standard
10//! cookie implementations, providing additional safety when parsing untrusted cookie headers.
11//!
12//! # Features
13//!
14//! - **Advanced semicolon handling**: Distinguishes between semicolons that are cookie separators
15//! and semicolons that appear within unquoted cookie values
16//! - **Iterator-based parsing**: Lazy evaluation returns an iterator over parsed cookies
17//! - **Error handling**: Returns `Result<Cookie, ParseError>` for each cookie, allowing
18//! graceful handling of malformed entries
19//! - **Percent-encoding support**: Enable the `percent-encode` feature to decode percent-encoded
20//! cookie values (e.g., `%20` for space)
21//!
22//! # When to Use This Library
23//!
24//! Use this library when:
25//! - Parsing non-standard cookie headers with unquoted values containing semicolons
26//! - You need safety when handling untrusted cookie input with unusual formatting
27//! - Your application requires advanced heuristics to detect cookie boundaries
28//!
29//! **Note**: For standard RFC 6265-compliant cookie parsing, the built-in `cookie` crate
30//! provides `SplitCookies` which is more performant and spec-compliant.
31//!
32//! # Usage
33//!
34//! Add this to your `Cargo.toml`:
35//!
36//! ```toml
37//! [dependencies]
38//! ri-cookie-header-string = "0.1"
39//! cookie = "0.18"
40//! ```
41//!
42//! It's recommended to enable the `percent-encode` feature:
43//!
44//! ```toml
45//! [dependencies]
46//! ri-cookie-header-string = { version = "0.1", features = ["percent-encode"] }
47//! cookie = "0.18"
48//! ```
49//!
50//! # Examples
51//!
52//! Basic usage:
53//!
54//! ```
55//! use ri_cookie_header_string::CookieHeaderStringExt;
56//! use cookie::Cookie;
57//!
58//! let cookie_header = "name=value; name2=value2; name3=value3";
59//! let cookies: Vec<_> = Cookie::header_string_parse(cookie_header)
60//! .filter_map(|result| result.ok())
61//! .collect();
62//!
63//! assert_eq!(cookies.len(), 3);
64//! ```
65//!
66//! Handling semicolons in unquoted cookie values:
67//!
68//! ```
69//! use ri_cookie_header_string::CookieHeaderStringExt;
70//! use cookie::Cookie;
71//!
72//! // Semicolon inside unquoted value is preserved correctly
73//! let cookie_header = "session=abc;123; other=value";
74//! let cookies: Vec<_> = Cookie::header_string_parse(cookie_header)
75//! .filter_map(|result| result.ok())
76//! .collect();
77//!
78//! assert_eq!(cookies.len(), 2);
79//! assert_eq!(cookies[0].value(), "abc;123");
80//! assert_eq!(cookies[1].value(), "value");
81//! ```
82
83use cookie::{Cookie, ParseError};
84use std::borrow::Cow;
85
86/// Iterator over cookies in a header string.
87///
88/// This iterator provides advanced parsing for non-standard cookie headers with unquoted
89/// values that may contain semicolons. It's not strictly RFC 6265 compliant but handles
90/// real-world edge cases in cookie parsing.
91///
92/// Based on the `cookie` crate's `SplitCookies` iterator with enhanced heuristics.
93pub struct HeaderStringCookies<'c> {
94 // The source string, which we split and parse.
95 string: Cow<'c, str>,
96 // The index where we last split off.
97 last: usize,
98}
99
100/// Helper: check if byte can start a cookie name (alphanumeric or underscore).
101///
102/// Used for heuristic detection of cookie boundaries when disambiguating
103/// whether a semicolon is a separator or part of a value.
104#[inline(always)]
105fn is_cookie_name_start(b: u8) -> bool {
106 matches!(b, b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_')
107}
108
109impl<'c> Iterator for HeaderStringCookies<'c> {
110 type Item = Result<Cookie<'c>, ParseError>;
111
112 fn next(&mut self) -> Option<Self::Item> {
113 let s = self.string.as_ref();
114 let len = s.len();
115
116 while self.last < len {
117 let i = self.last;
118
119 let j = s[i..].find(';').map(|k| i + k).unwrap_or(len);
120
121 // Check if this semicolon is actually a separator or part of value
122 let end_pos = if j < len {
123 // Look ahead to determine if semicolon is separator
124 let after = &s[j + 1..];
125 let trimmed = after.trim_start();
126
127 // Semicolon is separator if:
128 // 1. Followed by whitespace/semicolon only, OR
129 // 2. Followed by a valid cookie name (starts with alnum/underscore) and then '='
130 if trimmed.is_empty() || trimmed.starts_with(';') {
131 j // Separator
132 } else if let Some(first) = trimmed.as_bytes().first().copied() {
133 if is_cookie_name_start(first) {
134 // Check if followed by '=' (indicating new cookie)
135 if let Some(eq_pos) = trimmed.find('=') {
136 let name_part = &trimmed[..eq_pos].trim();
137 // Valid cookie name before '=' means this is a new cookie
138 if !name_part.is_empty()
139 && name_part.chars().all(|c| {
140 let b = c as u8;
141 matches!(b, b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'-')
142 })
143 {
144 j // Separator - new cookie starts here
145 } else {
146 // Not a valid cookie, semicolon is part of value - find next real separator
147 self.find_real_separator(j)
148 }
149 } else {
150 // No '=' found, semicolon is part of value
151 self.find_real_separator(j)
152 }
153 } else {
154 // Doesn't start with valid cookie char, semicolon is part of value
155 self.find_real_separator(j)
156 }
157 } else {
158 j // End of string
159 }
160 } else {
161 j // No semicolon found, end of string
162 };
163
164 self.last = end_pos + 1;
165
166 let cookie_str = s[i..end_pos].trim();
167
168 // Skip empty cookies
169 if cookie_str.is_empty() {
170 continue;
171 }
172
173 // Find '=' separator
174 let eq_pos = match cookie_str.find('=') {
175 Some(p) => p,
176 None => continue,
177 };
178
179 let name = cookie_str[..eq_pos].trim();
180 let val = cookie_str[eq_pos + 1..].trim();
181
182 if name.is_empty() {
183 continue;
184 }
185
186 // Create cookie - using Cow with owned strings to maintain lifetime
187 let cookie_result = if val.contains('%') {
188 #[cfg(feature = "percent-encode")]
189 {
190 // Build the cookie string for percent-decoding
191 let mut cookie_str_buf = String::with_capacity(name.len() + val.len() + 1);
192 cookie_str_buf.push_str(name);
193 cookie_str_buf.push('=');
194 cookie_str_buf.push_str(val);
195 Cookie::parse_encoded(cookie_str_buf)
196 }
197 #[cfg(not(feature = "percent-encode"))]
198 {
199 // Without percent-encode feature, treat % as literal character
200 Ok(Cookie::new(name.to_string(), val.to_string()))
201 }
202 } else {
203 Ok(Cookie::new(name.to_string(), val.to_string()))
204 };
205
206 return Some(cookie_result);
207 }
208
209 None
210 }
211}
212
213impl<'c> HeaderStringCookies<'c> {
214 /// Find the real cookie separator when a semicolon appears within an unquoted value.
215 ///
216 /// This method uses heuristics to determine if a semicolon is a cookie separator
217 /// (indicating the start of a new cookie) or part of the current cookie's value.
218 /// It looks ahead for patterns that indicate a new cookie boundary.
219 #[inline]
220 fn find_real_separator(&self, start: usize) -> usize {
221 let s = self.string.as_ref();
222 let bytes = s.as_bytes();
223 let len = s.len();
224 let mut i = start + 1;
225
226 // Skip whitespace
227 while i < len && bytes[i].is_ascii_whitespace() {
228 i += 1;
229 }
230
231 // Look for next semicolon that's a real separator
232 while i < len {
233 if bytes[i] == b';' {
234 let mut j = i + 1;
235 while j < len && bytes[j].is_ascii_whitespace() {
236 j += 1;
237 }
238
239 if j >= len || bytes[j] == b';' {
240 return i; // Real separator
241 }
242
243 // Check if followed by new cookie
244 if j < len && is_cookie_name_start(bytes[j]) {
245 let mut k = j;
246 while k < len && matches!(bytes[k], b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'-') {
247 k += 1;
248 }
249 if k < len && bytes[k] == b'=' {
250 return i; // Real separator - new cookie found
251 }
252 }
253 }
254 i += 1;
255 }
256
257 len // No separator found, end of string
258 }
259}
260
261pub trait CookieHeaderStringExt<'c> {
262 fn header_string_parse<S>(string: S) -> HeaderStringCookies<'c>
263 where
264 S: Into<Cow<'c, str>>;
265}
266
267impl<'c> CookieHeaderStringExt<'c> for Cookie<'c> {
268 #[inline(always)]
269 fn header_string_parse<S>(string: S) -> HeaderStringCookies<'c>
270 where
271 S: Into<Cow<'c, str>>,
272 {
273 HeaderStringCookies {
274 string: string.into(),
275 last: 0,
276 }
277 }
278}
279
280#[cfg(test)]
281mod tests {
282 use super::*;
283
284 #[test]
285 fn header_string_parse() {
286 let cases = [
287 ("", vec![]),
288 (";;", vec![]),
289 ("name=val;ue", vec![("name", "val;ue")]),
290 ("name=val;ue;hello=world", vec![("name", "val;ue"), ("hello", "world")]),
291 ];
292
293 for (string, expected) in cases {
294 let cookies: Vec<_> = Cookie::header_string_parse(string).filter_map(|parse| parse.ok()).collect();
295
296 let actual: Vec<_> = cookies.iter().map(|c| c.name_value()).collect();
297
298 assert_eq!(expected, actual);
299 }
300 }
301
302 #[test]
303 fn header_string_parse_empty_values() {
304 let cookie_header = "name=; other=value";
305 let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
306
307 assert_eq!(cookies.len(), 2);
308 assert_eq!(cookies[0].value(), "");
309 assert_eq!(cookies[1].value(), "value");
310 }
311
312 #[test]
313 fn header_string_parse_whitespace_handling() {
314 let cookie_header = " name = value ; other = val ";
315 let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
316
317 assert_eq!(cookies.len(), 2);
318 assert_eq!(cookies[0].name_value(), ("name", "value"));
319 assert_eq!(cookies[1].name_value(), ("other", "val"));
320 }
321
322 #[test]
323 fn header_string_parse_multiple_consecutive_semicolons() {
324 let cookie_header = "name=;;;value;;;other=val";
325 let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
326
327 // Multiple semicolons create empty entries which are skipped
328 assert!(!cookies.is_empty());
329 }
330
331 #[test]
332 fn header_string_parse_special_characters() {
333 let cookie_header = "session=!@#$%^&*(){}[]; other=value";
334 let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
335
336 assert_eq!(cookies.len(), 2);
337 assert_eq!(cookies[0].value(), "!@#$%^&*(){}[]");
338 }
339
340 #[test]
341 fn header_string_parse_value_with_equals() {
342 let cookie_header = "session=abc=123; other=value";
343 let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
344
345 assert_eq!(cookies.len(), 2);
346 assert_eq!(cookies[0].value(), "abc=123");
347 }
348
349 #[test]
350 fn header_string_parse_long_values() {
351 let long_value = "x".repeat(1000);
352 let cookie_header = format!("name={long_value}; other=val");
353 let cookies: Vec<_> = Cookie::header_string_parse(&cookie_header).filter_map(|parse| parse.ok()).collect();
354
355 assert_eq!(cookies.len(), 2);
356 assert_eq!(cookies[0].value().len(), 1000);
357 }
358
359 #[test]
360 fn header_string_parse_complex_semicolons() {
361 let cookie_header = "session=abc;def;ghi; other=value";
362 let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
363
364 assert_eq!(cookies.len(), 2);
365 assert_eq!(cookies[0].value(), "abc;def;ghi");
366 assert_eq!(cookies[1].value(), "value");
367 }
368
369 #[test]
370 #[cfg(feature = "percent-encode")]
371 fn header_string_parse_percent_encoded() {
372 let cookie_header = "name=val%20ue";
373 let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
374
375 assert_eq!(cookies.len(), 1);
376 assert_eq!(cookies[0].name_value(), ("name", "val ue"));
377 }
378
379 #[test]
380 #[cfg(feature = "percent-encode")]
381 fn header_string_parse_percent_encoded_semicolon() {
382 let cookie_header = "name=val%3B123; other=value";
383 let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
384
385 assert_eq!(cookies.len(), 2);
386 assert_eq!(cookies[0].value(), "val;123");
387 }
388
389 #[test]
390 fn header_string_parse_numeric_names() {
391 let cookie_header = "123=value; _456=other";
392 let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
393
394 assert_eq!(cookies.len(), 2);
395 assert_eq!(cookies[0].name(), "123");
396 }
397
398 #[test]
399 fn header_string_parse_hyphenated_names() {
400 let cookie_header = "session-id=value; other-val=data";
401 let cookies: Vec<_> = Cookie::header_string_parse(cookie_header).filter_map(|parse| parse.ok()).collect();
402
403 assert_eq!(cookies.len(), 2);
404 assert_eq!(cookies[0].name(), "session-id");
405 }
406}