use std::io::{Read, Write};
use std::net::TcpStream;
use std::time::Duration;
use crate::error::{Error, Result};
use crate::url::Url;
const IO_TIMEOUT: Duration = Duration::from_secs(30);
const MAX_RESPONSE_BYTES: u64 = 64 * 1024 * 1024;
pub fn fetch(url: &Url) -> Result<Vec<u8>> {
let selector = selector_from_path(&url.path)?;
let addr = format!("{}:{}", url.host, url.port);
let tcp = TcpStream::connect(&addr)?;
tcp.set_read_timeout(Some(IO_TIMEOUT))?;
tcp.set_write_timeout(Some(IO_TIMEOUT))?;
let mut request = Vec::with_capacity(selector.len() + 2);
request.extend_from_slice(selector.as_bytes());
request.extend_from_slice(b"\r\n");
if url.is_tls() {
let mut tls = crate::tls::connect_over(tcp, &url.host)?;
tls.write_all(&request)?;
tls.flush()?;
read_capped(&mut tls)
} else {
let mut sock = tcp;
sock.write_all(&request)?;
sock.flush()?;
read_capped(&mut sock)
}
}
fn read_capped<R: Read>(reader: &mut R) -> Result<Vec<u8>> {
let mut buf = Vec::new();
let n = reader.take(MAX_RESPONSE_BYTES + 1).read_to_end(&mut buf)?;
if n as u64 > MAX_RESPONSE_BYTES {
return Err(Error::BadResponse(format!(
"gopher: response exceeds {MAX_RESPONSE_BYTES} bytes"
)));
}
Ok(buf)
}
fn selector_from_path(path: &str) -> Result<String> {
let without_slash = path.strip_prefix('/').unwrap_or(path);
let mut chars = without_slash.chars();
let after_type = match chars.next() {
Some(_) => chars.as_str(),
None => "",
};
let (selector, query) = match after_type.split_once('?') {
Some((sel, q)) => (sel, Some(q)),
None => match after_type.split_once('\t') {
Some((sel, q)) => (sel, Some(q)),
None => (after_type, None),
},
};
if selector.bytes().any(|b| b.is_ascii_control()) {
return Err(Error::InvalidUrl(format!(
"gopher: control byte in selector of path '{path}'"
)));
}
match query {
Some(q) => {
if q.bytes().any(|b| b.is_ascii_control()) {
return Err(Error::InvalidUrl(format!(
"gopher: control byte in search query of path '{path}'"
)));
}
let mut line = String::with_capacity(selector.len() + 1 + q.len());
line.push_str(selector);
line.push('\t');
line.push_str(q);
Ok(line)
}
None => Ok(selector.to_string()),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn selector_root_slash() {
assert_eq!(selector_from_path("/").unwrap(), "");
}
#[test]
fn selector_empty() {
assert_eq!(selector_from_path("").unwrap(), "");
}
#[test]
fn selector_just_item_type() {
assert_eq!(selector_from_path("/1").unwrap(), "");
}
#[test]
fn selector_text_file() {
assert_eq!(selector_from_path("/0foo").unwrap(), "foo");
}
#[test]
fn selector_directory_with_subpath() {
assert_eq!(selector_from_path("/1docs/index").unwrap(), "docs/index");
}
#[test]
fn selector_rejects_crlf_injection() {
assert!(selector_from_path("/0foo\r\nbar").is_err());
assert!(selector_from_path("/0foo\nbar").is_err());
assert!(selector_from_path("/0foo\rbar").is_err());
}
#[test]
fn selector_rejects_nul_and_control_bytes() {
assert!(selector_from_path("/0foo\0bar").is_err());
assert!(selector_from_path("/0foo\x07bar").is_err());
}
#[test]
fn search_type7_joins_selector_and_query_with_tab() {
assert_eq!(selector_from_path("/7find?cats").unwrap(), "find\tcats");
}
#[test]
fn search_query_with_empty_selector() {
assert_eq!(selector_from_path("/7?cats").unwrap(), "\tcats");
}
#[test]
fn search_query_works_for_any_item_type() {
assert_eq!(selector_from_path("/1dir?term").unwrap(), "dir\tterm");
}
#[test]
fn search_query_with_multiple_words() {
assert_eq!(
selector_from_path("/7find?big+cats").unwrap(),
"find\tbig+cats"
);
}
#[test]
fn non_search_selector_has_no_trailing_tab() {
let line = selector_from_path("/0foo").unwrap();
assert_eq!(line, "foo");
assert!(!line.contains('\t'));
}
#[test]
fn search_only_first_question_mark_is_the_separator() {
assert_eq!(selector_from_path("/7a?b?c").unwrap(), "a\tb?c");
}
#[test]
fn search_literal_tab_selector_still_works() {
assert_eq!(selector_from_path("/7find\tcats").unwrap(), "find\tcats");
}
#[test]
fn search_query_rejects_crlf_and_nul() {
assert!(selector_from_path("/7find?a\r\nb").is_err());
assert!(selector_from_path("/7find?a\nb").is_err());
assert!(selector_from_path("/7find?a\rb").is_err());
assert!(selector_from_path("/7find?a\0b").is_err());
}
#[test]
fn search_query_rejects_embedded_tab() {
assert!(selector_from_path("/7find?a\tb").is_err());
}
#[test]
fn read_capped_accepts_response_at_limit() {
let data = vec![b'x'; 1024];
let mut cur = std::io::Cursor::new(data.clone());
assert_eq!(read_capped(&mut cur).unwrap(), data);
}
#[test]
fn read_capped_rejects_oversized_response() {
let oversized = MAX_RESPONSE_BYTES as usize + 1;
let mut cur = std::io::Cursor::new(vec![0u8; oversized]);
let err = read_capped(&mut cur).unwrap_err();
assert!(matches!(err, Error::BadResponse(_)));
}
}