use anyhow::{anyhow, Context, Result};
use once_cell::sync::Lazy;
use reqwest::blocking::{Client, ClientBuilder};
use std::fs::File;
use std::io::prelude::*;
use std::time::Duration;
use tracing::{event, Level};
use url::Url;
static CLIENT: Lazy<Option<Client>> = Lazy::new(|| {
ClientBuilder::new()
.proxy(reqwest::Proxy::custom(|url| {
env_proxy::for_url(url).to_url()
}))
.connect_timeout(Some(Duration::from_secs(1)))
.timeout(Some(Duration::from_secs(1)))
.referer(false)
.user_agent(concat!("mdcat/", env!("CARGO_PKG_VERSION")))
.build()
.map_err(|error| {
event!(
Level::ERROR,
?error,
"Failed to initialize HTTP client: {}",
error
);
error
})
.ok()
});
#[derive(Debug, Copy, Clone)]
pub enum ResourceAccess {
LocalOnly,
RemoteAllowed,
}
impl ResourceAccess {
pub fn permits(self, url: &Url) -> bool {
match self {
ResourceAccess::LocalOnly if is_local(url) => true,
ResourceAccess::RemoteAllowed => true,
_ => false,
}
}
}
fn is_local(url: &Url) -> bool {
url.scheme() == "file" && url.to_file_path().is_ok()
}
static RESOURCE_READ_LIMIT: u64 = 104_857_600;
fn fetch_http(url: &Url) -> Result<Vec<u8>> {
let response = CLIENT
.as_ref()
.with_context(|| "HTTP client not available".to_owned())?
.get(url.clone())
.send()
.with_context(|| format!("Failed to GET {url}"))?
.error_for_status()?;
match response.content_length() {
None => {
let mut buffer = Vec::with_capacity(1_048_576);
response
.take(RESOURCE_READ_LIMIT + 1)
.read_to_end(&mut buffer)
.with_context(|| format!("Failed to read from {url}"))?;
if RESOURCE_READ_LIMIT < buffer.len() as u64 {
Err(anyhow!(
"Contents of {url} exceeded {RESOURCE_READ_LIMIT}, rejected",
))
} else {
Ok(buffer)
}
}
Some(size) => {
if RESOURCE_READ_LIMIT < size {
Err(anyhow!(
"{url} reports size {size} which exceeds limit {RESOURCE_READ_LIMIT}, refusing to read",
))
} else {
let mut buffer = vec![0; size as usize];
response
.take(RESOURCE_READ_LIMIT)
.read_exact(buffer.as_mut_slice())
.with_context(|| format!("Failed to read from {url}"))?;
Ok(buffer)
}
}
}
}
pub fn read_url(url: &Url, access: ResourceAccess) -> Result<Vec<u8>> {
if !access.permits(url) {
return Err(anyhow!(
"Access denied to URL {} by policy {:?}",
url,
access
));
}
match url.scheme() {
"file" => match url.to_file_path() {
Ok(path) => {
let mut buffer = Vec::new();
File::open(path)
.with_context(|| format!("Failed to open file at {url}"))?
.take(RESOURCE_READ_LIMIT + 1)
.read_to_end(&mut buffer)
.with_context(|| format!("Failed to read from file at {url}"))?;
if RESOURCE_READ_LIMIT < buffer.len() as u64 {
Err(anyhow!(
"Contents of {url} exceeded {RESOURCE_READ_LIMIT}, rejected",
))
} else {
Ok(buffer)
}
}
Err(_) => Err(anyhow!("Cannot convert URL {url} to file path")),
},
"http" | "https" => fetch_http(url),
_ => Err(anyhow!(
"Cannot read from URL {url}, protocol not supported",
)),
}
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
#[cfg(unix)]
fn resource_access_permits_local_resource() {
let resource = Url::parse("file:///foo/bar").unwrap();
assert!(ResourceAccess::LocalOnly.permits(&resource));
assert!(ResourceAccess::RemoteAllowed.permits(&resource));
}
#[test]
#[cfg(unix)]
fn resource_access_permits_remote_file_url() {
let resource = Url::parse("file://example.com/foo/bar").unwrap();
assert!(!ResourceAccess::LocalOnly.permits(&resource));
assert!(ResourceAccess::RemoteAllowed.permits(&resource));
}
#[test]
fn resource_access_permits_https_url() {
let resource = Url::parse("https:///foo/bar").unwrap();
assert!(!ResourceAccess::LocalOnly.permits(&resource));
assert!(ResourceAccess::RemoteAllowed.permits(&resource));
}
#[test]
fn read_url_with_http_url_fails_if_local_only_access() {
let url = "https://eu.httpbin.org/status/404"
.parse::<url::Url>()
.unwrap();
let error = read_url(&url, ResourceAccess::LocalOnly)
.unwrap_err()
.to_string();
assert_eq!(
error,
"Access denied to URL https://eu.httpbin.org/status/404 by policy LocalOnly"
);
}
#[test]
fn read_url_with_http_url_fails_when_status_404() {
let url = "https://eu.httpbin.org/status/404"
.parse::<url::Url>()
.unwrap();
let result = read_url(&url, ResourceAccess::RemoteAllowed);
assert!(result.is_err(), "Unexpected success: {result:?}");
assert_eq!(
format!("{:#}", result.unwrap_err()),
"HTTP status client error (404 Not Found) for url (https://eu.httpbin.org/status/404)"
)
}
#[test]
fn read_url_with_http_url_returns_content_when_status_200() {
let url = "https://eu.httpbin.org/bytes/100"
.parse::<url::Url>()
.unwrap();
let result = read_url(&url, ResourceAccess::RemoteAllowed);
assert!(result.is_ok(), "Unexpected error: {result:?}");
assert_eq!(result.unwrap().len(), 100);
}
#[test]
fn read_url_with_http_url_fails_when_size_limit_is_exceeded() {
let url = "https://eu.httpbin.org/response-headers?content-length=115343400"
.parse::<url::Url>()
.unwrap();
let result = read_url(&url, ResourceAccess::RemoteAllowed);
assert!(result.is_err(), "Unexpected success: {result:?}");
let error = format!("{:#}", result.unwrap_err());
assert_eq!(error, "https://eu.httpbin.org/response-headers?content-length=115343400 reports size 115343400 which exceeds limit 104857600, refusing to read")
}
}