netscape-cookie-file-parser 0.2.0

Parse Netscape/curl cookie jar files while preserving raw cookie bytes.
Documentation
use std::io::BufRead;

use crate::cookie::cookie_prefix;
use crate::{Cookie, ParseError, ParseErrorKind};

/// Streaming parser over a Netscape cookie file.
///
/// The iterator yields cookies one at a time. Malformed cookie lines are yielded
/// as errors, but the iterator can continue afterward; I/O errors end the
/// stream because the reader is no longer reliable.
pub struct NetscapeCookieParser<R> {
    reader: R,
    line: Vec<u8>,
    line_number: usize,
    done: bool,
}

impl<R: BufRead> NetscapeCookieParser<R> {
    /// Creates a parser from any buffered reader.
    pub fn new(reader: R) -> Self {
        Self {
            reader,
            line: Vec::new(),
            line_number: 0,
            done: false,
        }
    }
}

pub(crate) fn parse_line_inner(line: &[u8]) -> Result<Option<Cookie>, ParseErrorKind> {
    // Work directly with bytes so non-UTF-8 cookie jars are not rejected before
    // the field-level validation runs.
    let line = trim_line_end(line);
    if line.iter().all(u8::is_ascii_whitespace) {
        return Ok(None);
    }

    // curl writes HttpOnly cookies as comment-looking lines prefixed with
    // "#HttpOnly_". Treat the prefix as cookie metadata, then parse the rest as
    // a normal Netscape cookie record.
    let (line, http_only) = if let Some(rest) = line.strip_prefix(b"#HttpOnly_") {
        (rest, true)
    } else if line.starts_with(b"#") {
        return Ok(None);
    } else {
        (line, false)
    };

    // "#HttpOnly_# ..." is still a comment after removing the HttpOnly marker.
    if line.starts_with(b"#") {
        return Ok(None);
    }

    let fields = fields(line);
    if fields.len() != 7 {
        return Err(ParseErrorKind::MissingFields {
            found: fields.len(),
        });
    }

    let name = fields[5].to_vec();
    let value = fields[6].to_vec();

    // curl rejects control octets in name/value while allowing high-bit bytes.
    if has_invalid_octets(&name) || has_invalid_octets(&value) {
        return Err(ParseErrorKind::InvalidOctets);
    }

    Ok(Some(Cookie {
        domain: fields[0].strip_prefix(b".").unwrap_or(fields[0]).to_vec(),
        tail_match: fields[1].eq_ignore_ascii_case(b"TRUE"),
        path: sanitize_path(fields[2]),
        secure: fields[3].eq_ignore_ascii_case(b"TRUE"),
        expires: parse_expires(fields[4])?,
        prefix: cookie_prefix(&name),
        name,
        value,
        http_only,
    }))
}

fn trim_line_end(mut line: &[u8]) -> &[u8] {
    // Accept LF, CRLF, and bare CR line endings.
    if let Some(without_lf) = line.strip_suffix(b"\n") {
        line = without_lf;
    }

    if let Some(without_cr) = line.strip_suffix(b"\r") {
        line = without_cr;
    }

    line
}

fn fields(line: &[u8]) -> Vec<&[u8]> {
    let mut fields: Vec<&[u8]> = line.split(|byte| *byte == b'\t').collect();

    // Some old curl jars omitted the path field. In those lines the third field
    // is a prefix of TRUE/FALSE, so insert the default path before continuing.
    if fields.len() >= 4 && is_legacy_path_bool(fields[2]) {
        fields.insert(2, b"/");
    }

    // A missing trailing value is accepted as an empty value.
    if fields.len() == 6 {
        fields.push(b"");
    }

    fields
}

fn is_legacy_path_bool(value: &[u8]) -> bool {
    b"TRUE".starts_with(value) || b"FALSE".starts_with(value)
}

fn sanitize_path(path: &[u8]) -> Vec<u8> {
    // curl strips one surrounding quote pair when it validates paths.
    let path = if let Some(path) = path.strip_prefix(b"\"") {
        path.strip_suffix(b"\"").unwrap_or(path)
    } else {
        path
    };

    // Relative or empty paths are not meaningful in the Netscape file format.
    if path.is_empty() || !path.starts_with(b"/") {
        return b"/".to_vec();
    }

    // Normalize non-root paths by dropping one trailing slash.
    if path.len() > 1 {
        path.strip_suffix(b"/").unwrap_or(path).to_vec()
    } else {
        path.to_vec()
    }
}

fn parse_expires(value: &[u8]) -> Result<u64, ParseErrorKind> {
    if value.is_empty() {
        return Err(ParseErrorKind::InvalidExpires);
    }

    // Parse manually so overflow is reported as a cookie parse error instead of
    // depending on string conversion or UTF-8 validation.
    let mut number = 0u64;
    for byte in value {
        if !byte.is_ascii_digit() {
            return Err(ParseErrorKind::InvalidExpires);
        }

        number = number
            .checked_mul(10)
            .and_then(|number| number.checked_add(u64::from(byte - b'0')))
            .ok_or(ParseErrorKind::InvalidExpires)?;
    }

    Ok(number)
}

fn has_invalid_octets(value: &[u8]) -> bool {
    value.iter().any(|byte| *byte < 0x20 || *byte == 0x7f)
}

impl<R: BufRead> Iterator for NetscapeCookieParser<R> {
    type Item = Result<Cookie, ParseError>;

    fn next(&mut self) -> Option<Self::Item> {
        if self.done {
            return None;
        }

        loop {
            self.line.clear();

            match self.reader.read_until(b'\n', &mut self.line) {
                Ok(0) => {
                    self.done = true;
                    return None;
                }
                Ok(_) => {
                    self.line_number += 1;

                    match parse_line_inner(&self.line) {
                        Ok(Some(cookie)) => return Some(Ok(cookie)),
                        Ok(None) => continue,
                        Err(kind) => {
                            return Some(Err(ParseError {
                                line: self.line_number,
                                kind,
                            }));
                        }
                    }
                }
                Err(error) => {
                    self.done = true;
                    // read_until failed before producing a complete next line,
                    // so report the line number that was being attempted.
                    return Some(Err(ParseError {
                        line: self.line_number + 1,
                        kind: ParseErrorKind::Io(error),
                    }));
                }
            }
        }
    }
}