rsurl 0.0.6

A pure-Rust implementation of curl. Library, C FFI, and CLI for HTTP/HTTPS/FTP/FTPS.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
//! POP3 and POP3S support.
//!
//! Specs: RFC 1939 (POP3), RFC 2595 (POP3 over TLS / STLS), RFC 8314
//! (implicit TLS on port 995 for POP3S), RFC 2384 (`pop3:` URL scheme).
//!
//! URLs: `pop3://user:pass@host/` (LIST), `pop3://user:pass@host/1` (RETR 1).
//! For POP3S, connect with TLS from the start via
//! [`crate::tls::connect_over`].

use crate::net::NetStream;
use std::io::{BufRead, BufReader, Read, Write};

use crate::error::{Error, Result};
use crate::tls::{connect_over, TlsStream};
use crate::url::Url;

/// Upper bound on a multi-line POP3 response body (LIST output or a RETR'd
/// message). A server that never sends the `.\r\n` terminator — or a single
/// newline-less line — would otherwise grow our buffer without limit, a cheap
/// memory-exhaustion DoS. 64 MiB matches the crate's other body caps (e.g.
/// imap, rtsp, websocket, gopher) and is generous enough for legitimate large
/// mailboxes and messages.
const MAX_RESPONSE_BYTES: usize = 64 * 1024 * 1024;

/// USER + PASS auth, then either LIST mailboxes (if no message number in
/// path) or RETR a specific message. Returns the raw bytes (RFC 5322 message
/// or the textual LIST output).
pub fn fetch(url: &Url) -> Result<Vec<u8>> {
    fetch_with(url, &crate::net::NetConfig::default())
}

pub(crate) fn fetch_with(url: &Url, cfg: &crate::net::NetConfig) -> Result<Vec<u8>> {
    let userinfo = url
        .userinfo
        .as_deref()
        .ok_or_else(|| Error::BadResponse("pop3: missing userinfo".into()))?;
    let (user, pass) = split_userinfo(userinfo);

    let action = parse_path(&url.path)
        .ok_or_else(|| Error::InvalidUrl(format!("pop3 path: {}", url.path)))?;

    let tcp = cfg.connect(&url.host, url.port)?;
    if url.is_tls() {
        let tls = connect_over(tcp, &url.host)?;
        let mut session = Session::new(BufReader::new(IoAdapter::Tls(Box::new(tls))));
        run(&mut session, user, pass, action)
    } else {
        let mut session = Session::new(BufReader::new(IoAdapter::Plain(tcp)));
        run(&mut session, user, pass, action)
    }
}

/// What the URL path asks us to do once we're authenticated.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Action {
    List,
    Retr(u32),
}

/// Split `"user[:pass]"` on the first `:`. Missing password becomes empty,
/// matching what curl does — RFC 1939's USER/PASS commands always need both
/// arguments, so we still send `PASS ""` for a userinfo with no colon.
fn split_userinfo(s: &str) -> (&str, &str) {
    match s.find(':') {
        Some(i) => (&s[..i], &s[i + 1..]),
        None => (s, ""),
    }
}

/// Reject a URL-derived string containing CR, LF, NUL, or any other ASCII
/// control byte before it's interpolated into a POP3 command. `what` names the
/// field for the error message.
fn reject_ctl(s: &str, what: &str) -> Result<()> {
    if let Some(b) = s.bytes().find(|b| *b < 0x20 || *b == 0x7f) {
        return Err(Error::BadResponse(format!(
            "pop3: {what} contains illegal control byte {b:#04x}"
        )));
    }
    Ok(())
}

/// Decide between LIST and RETR based on the URL path. Returns `None` for
/// anything that doesn't fit `/`, `""`, or `/<positive integer>`.
fn parse_path(path: &str) -> Option<Action> {
    let trimmed = path.strip_prefix('/').unwrap_or(path);
    if trimmed.is_empty() {
        return Some(Action::List);
    }
    // Reject anything with sub-paths or query — POP3 URLs are flat.
    if trimmed.contains('/') || trimmed.contains('?') || trimmed.contains('#') {
        return None;
    }
    trimmed.parse::<u32>().ok().map(Action::Retr)
}

/// Reverse the dot-stuffing applied to multi-line POP3 responses per
/// RFC 1939 §3.5: any line that begins with `.` had an extra `.` prepended
/// at transmission, so we strip the leading byte on receipt. Operates on
/// the body as already-collected bytes (terminator `.\r\n` removed).
fn un_dot_stuff(body: &[u8]) -> Vec<u8> {
    // Walk line-by-line, where lines end with `\r\n` (POP3 wire format).
    // A line is exactly the bytes between `\r\n` markers; trailing data
    // without a final CRLF is passed through unchanged.
    let mut out = Vec::with_capacity(body.len());
    let mut i = 0;
    let mut at_line_start = true;
    while i < body.len() {
        if at_line_start && body[i] == b'.' {
            // Drop this dot — it's the stuffing byte.
            i += 1;
            at_line_start = false;
            continue;
        }
        let b = body[i];
        out.push(b);
        i += 1;
        at_line_start = b == b'\n';
    }
    out
}

/// Read+Write transport, either plain or TLS-wrapped. Enum keeps the rest
/// of the protocol code monomorphic and lets us treat both legs the same.
/// `Tls` is boxed because the active TLS backend can be either purecrypto
/// (small) or rustls (~1 KiB), and clippy flags the resulting variant-size
/// mismatch against the bare `TcpStream` arm.
enum IoAdapter {
    Plain(Box<dyn NetStream>),
    Tls(Box<TlsStream<Box<dyn NetStream>>>),
}

impl Read for IoAdapter {
    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
        match self {
            IoAdapter::Plain(s) => s.read(buf),
            IoAdapter::Tls(s) => s.read(buf),
        }
    }
}

impl Write for IoAdapter {
    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
        match self {
            IoAdapter::Plain(s) => s.write(buf),
            IoAdapter::Tls(s) => s.write(buf),
        }
    }
    fn flush(&mut self) -> std::io::Result<()> {
        match self {
            IoAdapter::Plain(s) => s.flush(),
            IoAdapter::Tls(s) => s.flush(),
        }
    }
}

/// Buffered POP3 conversation. Wraps a transport in a `BufReader` so we can
/// pull whole CRLF-terminated lines without an extra allocation per byte.
struct Session<R: Read + Write> {
    io: BufReader<R>,
}

impl<R: Read + Write> Session<R> {
    fn new(io: BufReader<R>) -> Self {
        Self { io }
    }

    /// Send `cmd` followed by CRLF. The underlying writer lives behind the
    /// BufReader, so we have to reach through `get_mut` to write.
    ///
    /// Refuses any command line containing CR, LF, or NUL: the CRLF terminator
    /// is appended here, so an embedded CR/LF in URL-derived `user`/`pass`
    /// would otherwise inject extra POP3 commands.
    fn send(&mut self, cmd: &str) -> Result<()> {
        if cmd.bytes().any(|b| b == b'\r' || b == b'\n' || b == 0) {
            return Err(Error::BadResponse(
                "pop3: refusing to send command line with embedded CR/LF/NUL".into(),
            ));
        }
        let inner = self.io.get_mut();
        inner.write_all(cmd.as_bytes())?;
        inner.write_all(b"\r\n")?;
        inner.flush()?;
        Ok(())
    }

    /// Read a single CRLF-terminated reply line, trim the CRLF, and return
    /// it as a UTF-8 string. POP3 status lines are ASCII per the RFC.
    fn read_line(&mut self) -> Result<String> {
        let mut buf = Vec::new();
        let n = self.io.read_until(b'\n', &mut buf)?;
        if n == 0 {
            return Err(Error::UnexpectedEof);
        }
        // Strip trailing \r\n or \n.
        while matches!(buf.last(), Some(b'\n') | Some(b'\r')) {
            buf.pop();
        }
        String::from_utf8(buf).map_err(|_| Error::BadResponse("pop3: non-UTF8 status line".into()))
    }

    /// Read a status line and require it to start with `+OK`. Returns the
    /// text after `+OK` (with the leading space dropped) on success, or
    /// maps `-ERR` replies to `Error::BadResponse`.
    fn read_status(&mut self) -> Result<String> {
        let line = self.read_line()?;
        if let Some(rest) = line.strip_prefix("+OK") {
            // RFC 1939: "+OK" is followed by optional text, usually after a
            // single space. Trim that delimiter so callers see only the text.
            Ok(rest.strip_prefix(' ').unwrap_or(rest).to_string())
        } else if let Some(rest) = line.strip_prefix("-ERR") {
            let text = rest.strip_prefix(' ').unwrap_or(rest);
            Err(Error::BadResponse(format!("pop3: {text}")))
        } else {
            Err(Error::BadResponse(format!(
                "pop3: unexpected status line: {line}"
            )))
        }
    }

    /// Read a multi-line response body terminated by a line containing only
    /// `.` (CRLF.CRLF). Returns the body bytes with the terminator stripped
    /// but with all dot-stuffing still in place — callers decide whether to
    /// un-stuff (RETR) or pass through verbatim (LIST is also dot-stuffed,
    /// but stuffing only triggers when a line begins with `.`, which the
    /// LIST grammar forbids, so it's a no-op in practice).
    fn read_multiline(&mut self) -> Result<Vec<u8>> {
        let mut out = Vec::new();
        loop {
            // Bound each line read: `read_until` is otherwise unbounded, so a
            // server that sends a single line with no `\n` would make us
            // allocate forever. Cap the per-line reader at the remaining
            // response budget plus one byte, so we can distinguish "line is
            // exactly at the limit" from "line overran the limit".
            let mut line = Vec::new();
            let line_cap = MAX_RESPONSE_BYTES - out.len();
            let n = (&mut self.io)
                .take(line_cap as u64 + 1)
                .read_until(b'\n', &mut line)?;
            if n == 0 {
                return Err(Error::UnexpectedEof);
            }
            // A terminator line is exactly `.\r\n`, or `.\n` for servers
            // that play loose with CRLF. Detect before pushing.
            let is_terminator = matches!(line.as_slice(), b".\r\n" | b".\n");
            if is_terminator {
                return Ok(out);
            }
            // Aggregate cap: refuse to buffer past MAX_RESPONSE_BYTES, whether
            // the overrun comes from one giant newline-less line or from a
            // body that never terminates with `.\r\n`.
            if line.len() > line_cap {
                return Err(Error::BadResponse(format!(
                    "pop3: response exceeds maximum {MAX_RESPONSE_BYTES} bytes"
                )));
            }
            out.extend_from_slice(&line);
        }
    }
}

/// Drive the actual POP3 conversation once we have a Session ready.
fn run<R: Read + Write>(
    session: &mut Session<R>,
    user: &str,
    pass: &str,
    action: Action,
) -> Result<Vec<u8>> {
    // 1. Read greeting.
    session.read_status()?;

    // 2. Authenticate. We always send both USER and PASS; APOP and SASL are
    //    deferred (see module note). Reject control bytes in the URL-derived
    //    credentials up front so a CR/LF can't smuggle extra commands (`send`
    //    also guards the assembled line as a backstop).
    reject_ctl(user, "pop3 user")?;
    reject_ctl(pass, "pop3 password")?;
    session.send(&format!("USER {user}"))?;
    session.read_status()?;
    session.send(&format!("PASS {pass}"))?;
    session.read_status()?;

    // 3. Perform the requested action.
    let payload = match action {
        Action::List => {
            session.send("LIST")?;
            session.read_status()?;
            // LIST responses are dot-stuffed in theory but every line starts
            // with a digit, so un-stuffing would be a no-op. Pass through.
            session.read_multiline()?
        }
        Action::Retr(n) => {
            session.send(&format!("RETR {n}"))?;
            session.read_status()?;
            let raw = session.read_multiline()?;
            un_dot_stuff(&raw)
        }
    };

    // 4. Polite shutdown. Errors here are tolerated — we already have the
    //    payload, and a half-broken server shouldn't fail the whole fetch.
    let _ = session.send("QUIT");
    let _ = session.read_status();

    Ok(payload)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn un_dot_stuff_strips_leading_dot_on_each_line() {
        let input = b"hello\r\n..dotted\r\n.line\r\nplain\r\n";
        let got = un_dot_stuff(input);
        assert_eq!(got, b"hello\r\n.dotted\r\nline\r\nplain\r\n");
    }

    #[test]
    fn un_dot_stuff_handles_empty_body() {
        assert_eq!(un_dot_stuff(b""), b"");
    }

    #[test]
    fn un_dot_stuff_handles_first_line_dot() {
        // A message whose very first line begins with `.` gets stuffed too.
        let input = b"..first\r\nbody\r\n";
        assert_eq!(un_dot_stuff(input), b".first\r\nbody\r\n");
    }

    #[test]
    fn un_dot_stuff_does_not_consume_dot_in_middle() {
        let input = b"a.b\r\n.x\r\n";
        // `.` after `a` is mid-line, untouched. Leading `.` on second line
        // is the stuffing byte and gets removed.
        assert_eq!(un_dot_stuff(input), b"a.b\r\nx\r\n");
    }

    #[test]
    fn un_dot_stuff_handles_trailing_partial_line() {
        // No final CRLF — should still process leading dots correctly.
        let input = b".end";
        assert_eq!(un_dot_stuff(input), b"end");
    }

    #[test]
    fn parse_path_root_means_list() {
        assert_eq!(parse_path("/"), Some(Action::List));
        assert_eq!(parse_path(""), Some(Action::List));
    }

    #[test]
    fn parse_path_numeric_means_retr() {
        assert_eq!(parse_path("/1"), Some(Action::Retr(1)));
        assert_eq!(parse_path("/42"), Some(Action::Retr(42)));
        assert_eq!(parse_path("/0"), Some(Action::Retr(0)));
    }

    #[test]
    fn parse_path_rejects_garbage() {
        assert_eq!(parse_path("/abc"), None);
        assert_eq!(parse_path("/1/2"), None);
        assert_eq!(parse_path("/1?x=1"), None);
        assert_eq!(parse_path("/-1"), None);
        assert_eq!(parse_path("/1.0"), None);
    }

    #[test]
    fn split_userinfo_splits_on_first_colon() {
        assert_eq!(split_userinfo("alice:secret"), ("alice", "secret"));
        assert_eq!(split_userinfo("alice"), ("alice", ""));
        assert_eq!(split_userinfo("alice:s:e:c"), ("alice", "s:e:c"));
        assert_eq!(split_userinfo(":pass"), ("", "pass"));
    }

    #[test]
    fn reject_ctl_flags_control_bytes() {
        assert!(reject_ctl("alice", "pop3 user").is_ok());
        assert!(reject_ctl("p@ss:word", "pop3 password").is_ok());
        assert!(reject_ctl("alice\r\nDELE 1", "pop3 user").is_err());
        assert!(reject_ctl("alice\npass", "pop3 user").is_err());
        assert!(reject_ctl("alice\0", "pop3 user").is_err());
        assert!(reject_ctl("alice\x7f", "pop3 user").is_err());
    }

    /// In-memory Read+Write used to drive `Session::send` in tests.
    struct MockIo {
        written: Vec<u8>,
    }
    impl Read for MockIo {
        fn read(&mut self, _buf: &mut [u8]) -> std::io::Result<usize> {
            Ok(0)
        }
    }
    impl Write for MockIo {
        fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
            self.written.extend_from_slice(buf);
            Ok(buf.len())
        }
        fn flush(&mut self) -> std::io::Result<()> {
            Ok(())
        }
    }

    /// Read-only mock that replays a fixed byte stream, then EOFs. Used to
    /// drive `read_multiline` over the BufReader without a live socket.
    struct ReplayIo {
        data: std::io::Cursor<Vec<u8>>,
    }
    impl Read for ReplayIo {
        fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
            self.data.read(buf)
        }
    }
    impl Write for ReplayIo {
        fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
            Ok(buf.len())
        }
        fn flush(&mut self) -> std::io::Result<()> {
            Ok(())
        }
    }

    fn session_replaying(data: Vec<u8>) -> Session<ReplayIo> {
        Session::new(BufReader::new(ReplayIo {
            data: std::io::Cursor::new(data),
        }))
    }

    #[test]
    fn read_multiline_returns_body_on_terminator() {
        let mut s = session_replaying(b"line one\r\nline two\r\n.\r\n".to_vec());
        let body = s.read_multiline().unwrap();
        assert_eq!(body, b"line one\r\nline two\r\n");
    }

    #[test]
    fn read_multiline_aborts_past_aggregate_cap() {
        // A body that never sends `.\r\n`: many terminated lines whose total
        // exceeds MAX_RESPONSE_BYTES must error rather than buffer forever.
        // Build just over the cap out of 1 KiB lines.
        let line = {
            let mut l = vec![b'a'; 1022];
            l.extend_from_slice(b"\r\n");
            l
        };
        let n_lines = MAX_RESPONSE_BYTES / line.len() + 2;
        let mut data = Vec::with_capacity(n_lines * line.len());
        for _ in 0..n_lines {
            data.extend_from_slice(&line);
        }
        // Note: no terminating `.\r\n`.
        let mut s = session_replaying(data);
        match s.read_multiline() {
            Err(Error::BadResponse(m)) => assert!(m.contains("maximum"), "got {m}"),
            other => panic!("expected BadResponse(maximum), got {other:?}"),
        }
    }

    #[test]
    fn read_multiline_aborts_on_unbounded_single_line() {
        // A single newline-less line larger than the cap must error, not grow
        // the buffer without limit.
        let data = vec![b'x'; MAX_RESPONSE_BYTES + 1024];
        let mut s = session_replaying(data);
        match s.read_multiline() {
            Err(Error::BadResponse(m)) => assert!(m.contains("maximum"), "got {m}"),
            other => panic!("expected BadResponse(maximum), got {other:?}"),
        }
    }

    #[test]
    fn send_rejects_embedded_crlf() {
        let mut s = Session::new(BufReader::new(MockIo {
            written: Vec::new(),
        }));
        assert!(matches!(
            s.send("USER alice\r\nPASS x"),
            Err(Error::BadResponse(_))
        ));
        assert!(matches!(s.send("USER a\nb"), Err(Error::BadResponse(_))));
        assert!(matches!(s.send("USER a\0b"), Err(Error::BadResponse(_))));
        // Clean command goes through with exactly one trailing CRLF.
        s.send("USER alice").unwrap();
        assert_eq!(s.io.get_ref().written, b"USER alice\r\n");
    }
}