Skip to main content

mailrs_rfc5322/
header.rs

1//! Header iteration types — emitted by [`Message::headers`](crate::Message::headers).
2
3/// One header line in a message, returned by [`HeaderIter`].
4///
5/// Both name and value borrow from the original message bytes — no
6/// allocation is performed on the parse side. The value has had its
7/// leading whitespace (one space typically, sometimes a tab) trimmed
8/// but otherwise carries the wire-format bytes verbatim, including any
9/// CRLF + WSP "folded" continuation lines joined back into one slice.
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub struct Header<'a> {
12    /// Field name — case-insensitive per RFC 5322 §3.6.8. The crate
13    /// preserves the original case; comparators should fold.
14    pub name: &'a str,
15    /// Field value bytes, as they appear after the colon. Leading
16    /// whitespace trimmed (just the one space/tab after the colon).
17    /// For folded headers (continuation lines starting with WSP), the
18    /// CRLF + WSP sequences are kept in the slice — this is the raw
19    /// wire form, the caller decides whether to unfold.
20    pub value: &'a [u8],
21}
22
23impl<'a> Header<'a> {
24    /// Get the value as a `&str` if it's valid UTF-8. RFC 5322 §2.2
25    /// says fields are 7-bit ASCII; RFC 6532 extends to UTF-8. Almost
26    /// every real message's headers fit one of those. Returns `None`
27    /// for the rare malformed case.
28    pub fn value_str(&self) -> Option<&'a str> {
29        std::str::from_utf8(self.value).ok()
30    }
31}
32
33/// Iterator over all headers in a message, in the order they appear.
34///
35/// Returned by [`Message::headers`](crate::Message::headers). Stops at
36/// the empty line that separates headers from the body (or at EOF if
37/// the message has no body).
38pub struct HeaderIter<'a> {
39    pub(crate) bytes: &'a [u8],
40    pub(crate) cursor: usize,
41}
42
43impl<'a> Iterator for HeaderIter<'a> {
44    type Item = Header<'a>;
45
46    fn next(&mut self) -> Option<Header<'a>> {
47        let start = self.cursor;
48        if start >= self.bytes.len() {
49            return None;
50        }
51
52        // Find end of this logical header line (handles RFC 5322 §3.2.2
53        // line folding: continuation lines starting with WSP belong
54        // to this header).
55        let (line_end, after_crlf) = match find_unfolded_line_end(self.bytes, start) {
56            Some(pair) => pair,
57            None => {
58                // Trailing partial header at EOF — emit it.
59                self.cursor = self.bytes.len();
60                let line = &self.bytes[start..];
61                if line.is_empty() {
62                    return None;
63                }
64                return parse_header_line(self.bytes, start, self.bytes.len());
65            }
66        };
67
68        // Empty line (CRLF or LF only) marks the body boundary.
69        if line_end == start {
70            self.cursor = self.bytes.len(); // stop after this point
71            return None;
72        }
73
74        self.cursor = after_crlf;
75        parse_header_line(self.bytes, start, line_end)
76    }
77}
78
79/// Find the end of the logical header line that starts at `start`.
80///
81/// Returns `Some((line_end, after_crlf))` where `line_end` is the byte
82/// offset of the terminating CRLF/LF (so `&bytes[start..line_end]` is
83/// the header line content) and `after_crlf` is the offset of the
84/// next line's start. Handles folding: a CRLF/LF followed by WSP is
85/// NOT a line terminator (it's a continuation).
86///
87/// Returns `None` if no terminator is found before EOF.
88pub(crate) fn find_unfolded_line_end(bytes: &[u8], start: usize) -> Option<(usize, usize)> {
89    let mut i = start;
90    while i < bytes.len() {
91        // Locate the next LF (covers both \n and \r\n line endings).
92        let lf = bytes[i..].iter().position(|&b| b == b'\n');
93        let lf_abs = match lf {
94            Some(off) => i + off,
95            None => return None,
96        };
97
98        // line content end (strip trailing \r if present)
99        let mut content_end = lf_abs;
100        if content_end > start && bytes[content_end - 1] == b'\r' {
101            content_end -= 1;
102        }
103
104        let next = lf_abs + 1;
105        // Is the next line a continuation? (starts with SP or HTAB)
106        if next < bytes.len() && (bytes[next] == b' ' || bytes[next] == b'\t') {
107            // Continuation — keep scanning; the LF we found is part of
108            // this logical line.
109            i = next;
110            continue;
111        }
112
113        return Some((content_end, next));
114    }
115    None
116}
117
118/// Parse one header line `bytes[start..line_end]` into a `Header`.
119///
120/// Returns `None` if the line lacks a colon (malformed) — RFC 5322 says
121/// such a line terminates the header block, but we already detected
122/// empty-line termination above; a malformed line here is just skipped.
123fn parse_header_line(bytes: &[u8], start: usize, line_end: usize) -> Option<Header<'_>> {
124    let line = &bytes[start..line_end];
125    let colon = line.iter().position(|&b| b == b':')?;
126    // Name is line[..colon]. RFC 5322 §3.6.8: name is printable ASCII
127    // excluding colon; we don't validate (real-world messages
128    // sometimes have spaces or other anomalies — let downstream decide).
129    let name = std::str::from_utf8(&line[..colon]).ok()?;
130    // Skip the colon + at most one optional WSP after it.
131    let mut value_start_local = colon + 1;
132    if value_start_local < line.len() && (line[value_start_local] == b' ' || line[value_start_local] == b'\t') {
133        value_start_local += 1;
134    }
135    Some(Header {
136        name,
137        value: &line[value_start_local..],
138    })
139}
140
141#[cfg(test)]
142mod tests {
143    use super::*;
144
145    #[test]
146    fn unfolded_line_end_handles_lf() {
147        let bytes = b"Subject: hi\nFrom: x\n\nbody";
148        let (end, after) = find_unfolded_line_end(bytes, 0).unwrap();
149        assert_eq!(end, 11); // "Subject: hi"
150        assert_eq!(after, 12); // after \n
151    }
152
153    #[test]
154    fn unfolded_line_end_handles_crlf() {
155        let bytes = b"Subject: hi\r\nFrom: x\r\n";
156        let (end, after) = find_unfolded_line_end(bytes, 0).unwrap();
157        assert_eq!(end, 11); // content ends before \r
158        assert_eq!(after, 13); // after \r\n
159    }
160
161    #[test]
162    fn folded_line_keeps_both_lines_in_one_header() {
163        //                0         1         2
164        //                0123456789012345678901234
165        let bytes = b"Subject: first\r\n second\r\nFrom: x\r\n";
166        // Continuation line " second" is part of Subject. The unfolded
167        // line ends at the second \r\n's \r (offset 23), and after_crlf
168        // points past the \r\n (offset 25).
169        let (end, after) = find_unfolded_line_end(bytes, 0).unwrap();
170        // line_end points at the \r of the terminating \r\n (after
171        // "Subject: first\r\n second"). bytes[..end] spans everything
172        // up to but not including that \r.
173        assert!(bytes[..end].ends_with(b"second"));
174        // after_crlf is the next-line start: just past the \r\n.
175        assert_eq!(after, 25);
176        // and the next line, scanned from there, is "From: x"
177        let (end2, _) = find_unfolded_line_end(bytes, after).unwrap();
178        assert_eq!(&bytes[after..end2], b"From: x");
179    }
180
181    #[test]
182    fn parse_simple_header() {
183        let bytes = b"Subject: hello\r\n";
184        let h = parse_header_line(bytes, 0, 14).unwrap();
185        assert_eq!(h.name, "Subject");
186        assert_eq!(h.value, b"hello");
187    }
188
189    #[test]
190    fn parse_header_with_tab_after_colon() {
191        let bytes = b"X-Custom:\thi\r\n";
192        let h = parse_header_line(bytes, 0, 12).unwrap();
193        assert_eq!(h.name, "X-Custom");
194        assert_eq!(h.value, b"hi");
195    }
196
197    #[test]
198    fn parse_header_no_space_after_colon() {
199        let bytes = b"X:hi\r\n";
200        let h = parse_header_line(bytes, 0, 4).unwrap();
201        assert_eq!(h.name, "X");
202        assert_eq!(h.value, b"hi");
203    }
204
205    #[test]
206    fn parse_header_without_colon_returns_none() {
207        let bytes = b"malformed\r\n";
208        assert!(parse_header_line(bytes, 0, 9).is_none());
209    }
210}