Skip to main content

netscape_cookie_file_parser/
parser.rs

1use std::io::BufRead;
2
3use crate::cookie::cookie_prefix;
4use crate::{Cookie, ParseError, ParseErrorKind};
5
6/// Streaming parser over a Netscape cookie file.
7///
8/// The iterator yields cookies one at a time. Malformed cookie lines are yielded
9/// as errors, but the iterator can continue afterward; I/O errors end the
10/// stream because the reader is no longer reliable.
11pub struct NetscapeCookieParser<R> {
12    reader: R,
13    line: Vec<u8>,
14    line_number: usize,
15    done: bool,
16}
17
18impl<R: BufRead> NetscapeCookieParser<R> {
19    /// Creates a parser from any buffered reader.
20    pub fn new(reader: R) -> Self {
21        Self {
22            reader,
23            line: Vec::new(),
24            line_number: 0,
25            done: false,
26        }
27    }
28}
29
30pub(crate) fn parse_line_inner(line: &[u8]) -> Result<Option<Cookie>, ParseErrorKind> {
31    // Work directly with bytes so non-UTF-8 cookie jars are not rejected before
32    // the field-level validation runs.
33    let line = trim_line_end(line);
34    if line.iter().all(u8::is_ascii_whitespace) {
35        return Ok(None);
36    }
37
38    // curl writes HttpOnly cookies as comment-looking lines prefixed with
39    // "#HttpOnly_". Treat the prefix as cookie metadata, then parse the rest as
40    // a normal Netscape cookie record.
41    let (line, http_only) = if let Some(rest) = line.strip_prefix(b"#HttpOnly_") {
42        (rest, true)
43    } else if line.starts_with(b"#") {
44        return Ok(None);
45    } else {
46        (line, false)
47    };
48
49    // "#HttpOnly_# ..." is still a comment after removing the HttpOnly marker.
50    if line.starts_with(b"#") {
51        return Ok(None);
52    }
53
54    let fields = fields(line);
55    if fields.len() != 7 {
56        return Err(ParseErrorKind::MissingFields {
57            found: fields.len(),
58        });
59    }
60
61    let name = fields[5].to_vec();
62    let value = fields[6].to_vec();
63
64    // curl rejects control octets in name/value while allowing high-bit bytes.
65    if has_invalid_octets(&name) || has_invalid_octets(&value) {
66        return Err(ParseErrorKind::InvalidOctets);
67    }
68
69    Ok(Some(Cookie {
70        domain: domain(fields[0]),
71        tail_match: fields[1].eq_ignore_ascii_case(b"TRUE"),
72        path: fields[2].to_vec(),
73        secure: fields[3].eq_ignore_ascii_case(b"TRUE"),
74        expires: parse_expires(fields[4])?,
75        prefix: cookie_prefix(&name),
76        name,
77        value,
78        http_only,
79    }))
80}
81
82fn trim_line_end(mut line: &[u8]) -> &[u8] {
83    // Accept LF, CRLF, and bare CR line endings.
84    if let Some(without_lf) = line.strip_suffix(b"\n") {
85        line = without_lf;
86    }
87
88    if let Some(without_cr) = line.strip_suffix(b"\r") {
89        line = without_cr;
90    }
91
92    line
93}
94
95fn fields(line: &[u8]) -> Vec<&[u8]> {
96    let mut fields: Vec<&[u8]> = line.split(|byte| *byte == b'\t').collect();
97
98    // Some old curl jars omitted the path field. In those lines the third field
99    // is a prefix of TRUE/FALSE, so insert the default path before continuing.
100    if fields.len() >= 4 && is_legacy_path_bool(fields[2]) {
101        fields.insert(2, b"/");
102    }
103
104    // A missing trailing value is accepted as an empty value.
105    if fields.len() == 6 {
106        fields.push(b"");
107    }
108
109    fields
110}
111
112fn domain(domain: &[u8]) -> Vec<u8> {
113    domain.strip_prefix(b".").unwrap_or(domain).to_vec()
114}
115
116fn is_legacy_path_bool(value: &[u8]) -> bool {
117    b"TRUE".starts_with(value) || b"FALSE".starts_with(value)
118}
119
120fn parse_expires(value: &[u8]) -> Result<u64, ParseErrorKind> {
121    if value.is_empty() {
122        return Err(ParseErrorKind::InvalidExpires);
123    }
124
125    // Parse manually so overflow is reported as a cookie parse error instead of
126    // depending on string conversion or UTF-8 validation.
127    let mut number = 0u64;
128    for byte in value {
129        if !byte.is_ascii_digit() {
130            return Err(ParseErrorKind::InvalidExpires);
131        }
132
133        number = number
134            .checked_mul(10)
135            .and_then(|number| number.checked_add(u64::from(byte - b'0')))
136            .ok_or(ParseErrorKind::InvalidExpires)?;
137    }
138
139    Ok(number)
140}
141
142fn has_invalid_octets(value: &[u8]) -> bool {
143    value.iter().any(|byte| *byte < 0x20 || *byte == 0x7f)
144}
145
146impl<R: BufRead> Iterator for NetscapeCookieParser<R> {
147    type Item = Result<Cookie, ParseError>;
148
149    fn next(&mut self) -> Option<Self::Item> {
150        if self.done {
151            return None;
152        }
153
154        loop {
155            self.line.clear();
156
157            match self.reader.read_until(b'\n', &mut self.line) {
158                Ok(0) => {
159                    self.done = true;
160                    return None;
161                }
162                Ok(_) => {
163                    self.line_number += 1;
164
165                    match parse_line_inner(&self.line) {
166                        Ok(Some(cookie)) => return Some(Ok(cookie)),
167                        Ok(None) => continue,
168                        Err(kind) => {
169                            return Some(Err(ParseError {
170                                line: self.line_number,
171                                kind,
172                            }));
173                        }
174                    }
175                }
176                Err(error) => {
177                    self.done = true;
178                    // read_until failed before producing a complete next line,
179                    // so report the line number that was being attempted.
180                    return Some(Err(ParseError {
181                        line: self.line_number + 1,
182                        kind: ParseErrorKind::Io(error),
183                    }));
184                }
185            }
186        }
187    }
188}