use std::io::Read;
use std::time::Duration;
use thiserror::Error;
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
pub struct FetchResult {
pub data: Vec<u8>,
#[allow(dead_code)]
pub range_supported: bool,
#[allow(dead_code)]
pub content_length: Option<u64>,
}
#[derive(Error, Debug)]
pub enum HttpError {
#[error("Invalid URL: {0}")]
InvalidUrl(String),
#[error("HTTP error {status}: {message}")]
HttpStatus { status: u16, message: String },
#[error("Network error: {0}")]
Network(String),
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
}
impl From<ureq::Error> for HttpError {
fn from(err: ureq::Error) -> Self {
match err {
ureq::Error::StatusCode(code) => HttpError::HttpStatus {
status: code,
message: format!("Server returned status {code}"),
},
_ => HttpError::Network(err.to_string()),
}
}
}
pub fn fetch_url(url: &str, max_bytes: Option<usize>) -> Result<FetchResult, HttpError> {
if !url.starts_with("http://") && !url.starts_with("https://") {
return Err(HttpError::InvalidUrl(format!(
"URL must start with http:// or https://: {url}"
)));
}
if let Some(bytes) = max_bytes {
match fetch_with_range(url, bytes) {
Ok(result) if result.range_supported => return Ok(result),
Ok(result) => {
return Ok(result);
}
Err(HttpError::HttpStatus { status: 416, .. }) => {
}
Err(e) => return Err(e),
}
}
fetch_full(url, max_bytes)
}
fn fetch_with_range(url: &str, bytes: usize) -> Result<FetchResult, HttpError> {
let range_header = format!("bytes=0-{}", bytes.saturating_sub(1));
let config = ureq::Agent::config_builder()
.timeout_global(Some(DEFAULT_TIMEOUT))
.build();
let agent = ureq::Agent::new_with_config(config);
let response = agent.get(url).header("Range", &range_header).call()?;
let status = response.status();
let content_length = response
.headers()
.get("Content-Range")
.and_then(|h| {
let s = h.to_str().ok()?;
s.split('/').next_back()?.parse::<u64>().ok()
})
.or_else(|| {
response
.headers()
.get("Content-Length")
.and_then(|h| h.to_str().ok()?.parse::<u64>().ok())
});
let range_supported = status == 206;
let body = response.into_body();
let reader = body.into_reader();
let mut data = Vec::with_capacity(bytes);
reader.take(bytes as u64).read_to_end(&mut data)?;
Ok(FetchResult {
data,
range_supported,
content_length,
})
}
fn fetch_full(url: &str, max_bytes: Option<usize>) -> Result<FetchResult, HttpError> {
let config = ureq::Agent::config_builder()
.timeout_global(Some(DEFAULT_TIMEOUT))
.build();
let agent = ureq::Agent::new_with_config(config);
let response = agent.get(url).call()?;
let content_length = response
.headers()
.get("Content-Length")
.and_then(|h| h.to_str().ok()?.parse::<u64>().ok());
let body = response.into_body();
let mut reader = body.into_reader();
const MAX_BYTES: u64 = 1024 * 1024 * 1024; let data = if let Some(bytes) = max_bytes {
let mut buf = Vec::with_capacity(bytes);
reader.take(bytes as u64).read_to_end(&mut buf)?;
buf
} else {
let mut buf = Vec::new();
(&mut reader).take(MAX_BYTES).read_to_end(&mut buf)?;
if buf.len() as u64 == MAX_BYTES {
let mut probe = [0u8; 1];
if reader.read(&mut probe)? > 0 {
eprintln!(
"warning: HTTP response exceeds 1 GB; sniffing on truncated sample — results may be inaccurate"
);
}
}
buf
};
Ok(FetchResult {
data,
range_supported: false,
content_length,
})
}