use super::{categorize_reqwest_error, check_size, process_content, WebFetchOutput};
use crate::error::{ToolError, ToolResult};
use std::io::Read;
use std::mem::MaybeUninit;
use std::time::Duration;
pub fn fetch_url(
client: &reqwest::blocking::Client,
url: &str,
timeout: Duration,
) -> ToolResult<WebFetchOutput> {
let mut response = client
.get(url)
.timeout(timeout)
.send()
.map_err(|e| categorize_reqwest_error(e, url))?;
let status = response.status();
if !status.is_success() {
return Err(ToolError::Http(format!("HTTP {} for {}", status, url)));
}
let content_type = response
.headers()
.get(reqwest::header::CONTENT_TYPE)
.and_then(|v| v.to_str().ok())
.unwrap_or("text/plain")
.to_string();
let content_length = response.content_length().map(|len| len as usize);
if let Some(len) = content_length {
check_size(len, url)?;
}
let mut bytes = content_length.map_or_else(Vec::new, Vec::with_capacity);
let mut total_len: usize = 0;
let mut buffer = [MaybeUninit::<u8>::uninit(); 8192];
let buffer_ptr = buffer.as_mut_ptr() as *mut u8;
let buffer_len = buffer.len();
loop {
let n = {
let buf = unsafe { std::slice::from_raw_parts_mut(buffer_ptr, buffer_len) };
response
.read(buf)
.map_err(|e| ToolError::Http(e.to_string()))?
};
if n == 0 {
break;
}
total_len += n;
check_size(total_len, url)?;
let initialized = unsafe { std::slice::from_raw_parts(buffer_ptr, n) };
bytes.extend_from_slice(initialized);
}
let byte_length = total_len;
let raw_content = String::from_utf8_lossy(&bytes);
let content = process_content(&raw_content, &content_type);
Ok(WebFetchOutput {
content,
content_type,
byte_length,
})
}
#[cfg(test)]
mod tests {
use super::*;
fn test_client() -> reqwest::blocking::Client {
reqwest::blocking::Client::builder()
.build()
.expect("client build failed")
}
#[test]
fn fetches_plain_text() {
let client = test_client();
let result = fetch_url(
&client,
"https://httpbin.org/robots.txt",
Duration::from_secs(10),
);
if let Ok(output) = result {
assert!(!output.content.is_empty());
assert!(!output.content_type.is_empty());
}
}
#[test]
fn handles_404() {
let client = test_client();
let result = fetch_url(
&client,
"https://httpbin.org/status/404",
Duration::from_secs(10),
);
if let Err(e) = result {
assert!(matches!(e, ToolError::Http(_)));
}
}
}