Skip to main content

netscape_cookie_file_parser/
parser.rs

1use std::io::BufRead;
2
3use crate::cookie::cookie_prefix;
4use crate::{Cookie, ParseError, ParseErrorKind};
5
6/// Streaming parser over a Netscape cookie file.
7///
8/// The iterator yields cookies one at a time. Malformed cookie lines are yielded
9/// as errors, but the iterator can continue afterward; I/O errors end the
10/// stream because the reader is no longer reliable.
11pub struct NetscapeCookieParser<R> {
12    reader: R,
13    line: Vec<u8>,
14    line_number: usize,
15    done: bool,
16}
17
18impl<R: BufRead> NetscapeCookieParser<R> {
19    /// Creates a parser from any buffered reader.
20    pub fn new(reader: R) -> Self {
21        Self {
22            reader,
23            line: Vec::new(),
24            line_number: 0,
25            done: false,
26        }
27    }
28}
29
30pub(crate) fn parse_line_inner(line: &[u8]) -> Result<Option<Cookie>, ParseErrorKind> {
31    // Work directly with bytes so non-UTF-8 cookie jars are not rejected before
32    // the field-level validation runs.
33    let line = trim_line_end(line);
34    if line.iter().all(u8::is_ascii_whitespace) {
35        return Ok(None);
36    }
37
38    // curl writes HttpOnly cookies as comment-looking lines prefixed with
39    // "#HttpOnly_". Treat the prefix as cookie metadata, then parse the rest as
40    // a normal Netscape cookie record.
41    let (line, http_only) = if let Some(rest) = line.strip_prefix(b"#HttpOnly_") {
42        (rest, true)
43    } else if line.starts_with(b"#") {
44        return Ok(None);
45    } else {
46        (line, false)
47    };
48
49    // "#HttpOnly_# ..." is still a comment after removing the HttpOnly marker.
50    if line.starts_with(b"#") {
51        return Ok(None);
52    }
53
54    let fields = fields(line);
55    if fields.len() != 7 {
56        return Err(ParseErrorKind::MissingFields {
57            found: fields.len(),
58        });
59    }
60
61    let name = fields[5].to_vec();
62    let value = fields[6].to_vec();
63
64    // curl rejects control octets in name/value while allowing high-bit bytes.
65    if has_invalid_octets(&name) || has_invalid_octets(&value) {
66        return Err(ParseErrorKind::InvalidOctets);
67    }
68
69    Ok(Some(Cookie {
70        domain: fields[0].strip_prefix(b".").unwrap_or(fields[0]).to_vec(),
71        tail_match: fields[1].eq_ignore_ascii_case(b"TRUE"),
72        path: sanitize_path(fields[2]),
73        secure: fields[3].eq_ignore_ascii_case(b"TRUE"),
74        expires: parse_expires(fields[4])?,
75        prefix: cookie_prefix(&name),
76        name,
77        value,
78        http_only,
79    }))
80}
81
82fn trim_line_end(mut line: &[u8]) -> &[u8] {
83    // Accept LF, CRLF, and bare CR line endings.
84    if let Some(without_lf) = line.strip_suffix(b"\n") {
85        line = without_lf;
86    }
87
88    if let Some(without_cr) = line.strip_suffix(b"\r") {
89        line = without_cr;
90    }
91
92    line
93}
94
95fn fields(line: &[u8]) -> Vec<&[u8]> {
96    let mut fields: Vec<&[u8]> = line.split(|byte| *byte == b'\t').collect();
97
98    // Some old curl jars omitted the path field. In those lines the third field
99    // is a prefix of TRUE/FALSE, so insert the default path before continuing.
100    if fields.len() >= 4 && is_legacy_path_bool(fields[2]) {
101        fields.insert(2, b"/");
102    }
103
104    // A missing trailing value is accepted as an empty value.
105    if fields.len() == 6 {
106        fields.push(b"");
107    }
108
109    fields
110}
111
112fn is_legacy_path_bool(value: &[u8]) -> bool {
113    b"TRUE".starts_with(value) || b"FALSE".starts_with(value)
114}
115
116fn sanitize_path(path: &[u8]) -> Vec<u8> {
117    // curl strips one surrounding quote pair when it validates paths.
118    let path = if let Some(path) = path.strip_prefix(b"\"") {
119        path.strip_suffix(b"\"").unwrap_or(path)
120    } else {
121        path
122    };
123
124    // Relative or empty paths are not meaningful in the Netscape file format.
125    if path.is_empty() || !path.starts_with(b"/") {
126        return b"/".to_vec();
127    }
128
129    // Normalize non-root paths by dropping one trailing slash.
130    if path.len() > 1 {
131        path.strip_suffix(b"/").unwrap_or(path).to_vec()
132    } else {
133        path.to_vec()
134    }
135}
136
137fn parse_expires(value: &[u8]) -> Result<u64, ParseErrorKind> {
138    if value.is_empty() {
139        return Err(ParseErrorKind::InvalidExpires);
140    }
141
142    // Parse manually so overflow is reported as a cookie parse error instead of
143    // depending on string conversion or UTF-8 validation.
144    let mut number = 0u64;
145    for byte in value {
146        if !byte.is_ascii_digit() {
147            return Err(ParseErrorKind::InvalidExpires);
148        }
149
150        number = number
151            .checked_mul(10)
152            .and_then(|number| number.checked_add(u64::from(byte - b'0')))
153            .ok_or(ParseErrorKind::InvalidExpires)?;
154    }
155
156    Ok(number)
157}
158
159fn has_invalid_octets(value: &[u8]) -> bool {
160    value.iter().any(|byte| *byte < 0x20 || *byte == 0x7f)
161}
162
163impl<R: BufRead> Iterator for NetscapeCookieParser<R> {
164    type Item = Result<Cookie, ParseError>;
165
166    fn next(&mut self) -> Option<Self::Item> {
167        if self.done {
168            return None;
169        }
170
171        loop {
172            self.line.clear();
173
174            match self.reader.read_until(b'\n', &mut self.line) {
175                Ok(0) => {
176                    self.done = true;
177                    return None;
178                }
179                Ok(_) => {
180                    self.line_number += 1;
181
182                    match parse_line_inner(&self.line) {
183                        Ok(Some(cookie)) => return Some(Ok(cookie)),
184                        Ok(None) => continue,
185                        Err(kind) => {
186                            return Some(Err(ParseError {
187                                line: self.line_number,
188                                kind,
189                            }));
190                        }
191                    }
192                }
193                Err(error) => {
194                    self.done = true;
195                    // read_until failed before producing a complete next line,
196                    // so report the line number that was being attempted.
197                    return Some(Err(ParseError {
198                        line: self.line_number + 1,
199                        kind: ParseErrorKind::Io(error),
200                    }));
201                }
202            }
203        }
204    }
205}