use std::collections::BTreeMap;
use std::fmt::{Display, Formatter};
use std::fs;
use std::path::{Path, PathBuf};
use std::time::Duration;
use index_core::{FormMethod, FormSubmission, IndexUrl, UrlError};
use index_security::{ContentLimits, SecurityError, check_content_size, validate_redirect_chain};
use ureq::ResponseExt;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Request {
pub url: IndexUrl,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Response {
pub final_url: IndexUrl,
pub redirects: Vec<IndexUrl>,
pub mime_type: Option<String>,
pub body: String,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FetchError {
NotFound(String),
NetworkNotImplemented,
Network(String),
Timeout {
timeout_ms: u64,
},
HttpStatus {
status: u16,
url: String,
},
UnsupportedContentType(String),
Url(UrlError),
Cache(String),
Security(SecurityError),
}
impl Display for FetchError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::NotFound(url) => write!(f, "no fixture response registered for {url}"),
Self::NetworkNotImplemented => f.write_str("network fetching is not implemented yet"),
Self::Network(error) => write!(f, "network fetch failed: {error}"),
Self::Timeout { timeout_ms } => {
write!(f, "network fetch timed out after {timeout_ms}ms")
}
Self::HttpStatus { status, url } => {
write!(f, "HTTP status {status} returned for {url}")
}
Self::UnsupportedContentType(mime_type) => {
write!(f, "unsupported response content type: {mime_type}")
}
Self::Url(error) => write!(f, "network response URL is invalid: {error}"),
Self::Cache(error) => write!(f, "cache fallback failed: {error}"),
Self::Security(error) => write!(f, "fetch security policy rejected response: {error}"),
}
}
}
impl std::error::Error for FetchError {}
impl FetchError {
#[must_use]
pub fn is_transient(&self) -> bool {
matches!(self, Self::Network(_) | Self::Timeout { .. })
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RetryPolicy {
max_attempts: u8,
backoff: Vec<Duration>,
}
impl RetryPolicy {
#[must_use]
pub fn new(max_attempts: u8, backoff_millis: impl IntoIterator<Item = u64>) -> Self {
Self {
max_attempts: max_attempts.max(1),
backoff: backoff_millis
.into_iter()
.map(Duration::from_millis)
.collect(),
}
}
#[must_use]
pub const fn max_attempts(&self) -> u8 {
self.max_attempts
}
#[must_use]
pub fn backoff_millis(&self) -> Vec<u64> {
self.backoff
.iter()
.map(|duration| duration.as_millis().min(u128::from(u64::MAX)) as u64)
.collect()
}
}
impl Default for RetryPolicy {
fn default() -> Self {
Self::new(3, [0, 100, 250])
}
}
#[derive(Debug)]
pub enum CacheError {
Io(std::io::Error),
Parse(String),
Url(index_core::UrlError),
}
impl From<std::io::Error> for CacheError {
fn from(value: std::io::Error) -> Self {
Self::Io(value)
}
}
impl Display for CacheError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::Io(error) => write!(f, "cache IO failed: {error}"),
Self::Parse(reason) => write!(f, "cache data is invalid: {reason}"),
Self::Url(error) => write!(f, "cache URL is invalid: {error}"),
}
}
}
impl std::error::Error for CacheError {}
pub trait Fetcher {
fn fetch(&self, request: &Request) -> Result<Response, FetchError>;
}
pub trait FormSubmitter {
fn submit_form(&self, submission: &FormSubmission) -> Result<Response, FetchError>;
}
#[derive(Debug, Clone)]
pub struct UreqFetcher {
agent: ureq::Agent,
}
impl UreqFetcher {
#[must_use]
pub fn new() -> Self {
let agent: ureq::Agent = ureq::Agent::config_builder()
.save_redirect_history(true)
.timeout_global(Some(Duration::from_secs(30)))
.user_agent("Index/0.1.0")
.build()
.into();
Self { agent }
}
}
impl Default for UreqFetcher {
fn default() -> Self {
Self::new()
}
}
impl Fetcher for UreqFetcher {
fn fetch(&self, request: &Request) -> Result<Response, FetchError> {
let mut response = self
.agent
.get(request.url.as_str())
.call()
.map_err(|error| FetchError::Network(error.to_string()))?;
response_from_ureq(request.url.as_str(), &mut response)
}
}
impl FormSubmitter for UreqFetcher {
fn submit_form(&self, submission: &FormSubmission) -> Result<Response, FetchError> {
match submission.method {
FormMethod::Get => self.fetch(&Request {
url: submission.action.clone(),
}),
FormMethod::Post => {
let body = submission.body.as_deref().unwrap_or_default();
let mut response = self
.agent
.post(submission.action.as_str())
.header("content-type", "application/x-www-form-urlencoded")
.send(body)
.map_err(|error| FetchError::Network(error.to_string()))?;
response_from_ureq(submission.action.as_str(), &mut response)
}
}
}
}
fn response_from_ureq(
requested_url: &str,
response: &mut ureq::http::Response<ureq::Body>,
) -> Result<Response, FetchError> {
let final_url = IndexUrl::parse(response.get_uri().to_string()).map_err(FetchError::Url)?;
let redirects = response
.get_redirect_history()
.unwrap_or(&[])
.iter()
.filter_map(|uri| {
let value = uri.to_string();
(value != requested_url && value != final_url.as_str()).then_some(value)
})
.map(IndexUrl::parse)
.collect::<Result<Vec<_>, _>>()
.map_err(FetchError::Url)?;
let mime_type = response
.headers()
.get("content-type")
.and_then(|value| value.to_str().ok())
.map(ToOwned::to_owned);
let body = response
.body_mut()
.read_to_string()
.map_err(|error| FetchError::Network(error.to_string()))?;
Ok(Response {
final_url,
redirects,
mime_type,
body,
})
}
#[derive(Debug, Clone)]
pub struct SecureFetcher<F> {
inner: F,
limits: ContentLimits,
}
impl<F> SecureFetcher<F> {
#[must_use]
pub fn new(inner: F) -> Self {
Self {
inner,
limits: ContentLimits::default(),
}
}
#[must_use]
pub const fn with_limits(inner: F, limits: ContentLimits) -> Self {
Self { inner, limits }
}
#[must_use]
pub fn inner(&self) -> &F {
&self.inner
}
}
impl<F: Fetcher> Fetcher for SecureFetcher<F> {
fn fetch(&self, request: &Request) -> Result<Response, FetchError> {
let response = self.inner.fetch(request)?;
validate_response(&request.url, response, self.limits)
}
}
impl<F: FormSubmitter> FormSubmitter for SecureFetcher<F> {
fn submit_form(&self, submission: &FormSubmission) -> Result<Response, FetchError> {
let response = self.inner.submit_form(submission)?;
validate_response(&submission.action, response, self.limits)
}
}
fn validate_response(
requested_url: &IndexUrl,
response: Response,
limits: ContentLimits,
) -> Result<Response, FetchError> {
validate_text_mime(response.mime_type.as_deref())?;
validate_redirect_chain(
requested_url,
&response.redirects,
&response.final_url,
limits,
)
.map_err(FetchError::Security)?;
check_content_size(&response.body, limits).map_err(FetchError::Security)?;
Ok(response)
}
impl<F: FormSubmitter> FormSubmitter for RetryingFetcher<F> {
fn submit_form(&self, submission: &FormSubmission) -> Result<Response, FetchError> {
let mut attempt = 1;
loop {
match self.inner.submit_form(submission) {
Ok(response) => return Ok(response),
Err(error) if error.is_transient() && attempt < self.policy.max_attempts() => {
attempt += 1;
}
Err(error) => return Err(error),
}
}
}
}
impl<F: FormSubmitter> FormSubmitter for CacheFallbackFetcher<F> {
fn submit_form(&self, submission: &FormSubmission) -> Result<Response, FetchError> {
match self.inner.submit_form(submission) {
Ok(response) => {
let _ = self.cache.store(&response);
Ok(response)
}
Err(error) if error.is_transient() => match self.cache.load(&submission.action) {
Ok(Some(response)) => Ok(response),
Ok(None) => Err(error),
Err(cache_error) => Err(FetchError::Cache(cache_error.to_string())),
},
Err(error) => Err(error),
}
}
}
fn validate_text_mime(mime_type: Option<&str>) -> Result<(), FetchError> {
let Some(mime_type) = mime_type else {
return Ok(());
};
let normalized = mime_type
.split(';')
.next()
.map(str::trim)
.unwrap_or_default()
.to_ascii_lowercase();
match normalized.as_str() {
""
| "text/html"
| "text/plain"
| "application/xhtml+xml"
| "application/xml"
| "text/xml" => Ok(()),
_ => Err(FetchError::UnsupportedContentType(mime_type.to_owned())),
}
}
#[derive(Debug, Clone)]
pub struct RetryingFetcher<F> {
inner: F,
policy: RetryPolicy,
}
impl<F> RetryingFetcher<F> {
#[must_use]
pub fn new(inner: F) -> Self {
Self {
inner,
policy: RetryPolicy::default(),
}
}
#[must_use]
pub fn with_policy(inner: F, policy: RetryPolicy) -> Self {
Self { inner, policy }
}
#[must_use]
pub fn policy(&self) -> &RetryPolicy {
&self.policy
}
}
impl<F: Fetcher> Fetcher for RetryingFetcher<F> {
fn fetch(&self, request: &Request) -> Result<Response, FetchError> {
let mut attempt = 1;
loop {
match self.inner.fetch(request) {
Ok(response) => return Ok(response),
Err(error) if error.is_transient() && attempt < self.policy.max_attempts() => {
attempt += 1;
}
Err(error) => return Err(error),
}
}
}
}
#[derive(Debug, Clone)]
pub struct CacheFallbackFetcher<F> {
inner: F,
cache: FileCache,
}
impl<F> CacheFallbackFetcher<F> {
#[must_use]
pub fn new(inner: F, cache: FileCache) -> Self {
Self { inner, cache }
}
#[must_use]
pub fn cache(&self) -> &FileCache {
&self.cache
}
}
impl<F: Fetcher> Fetcher for CacheFallbackFetcher<F> {
fn fetch(&self, request: &Request) -> Result<Response, FetchError> {
match self.inner.fetch(request) {
Ok(response) => {
let _ = self.cache.store(&response);
Ok(response)
}
Err(error) if error.is_transient() => match self.cache.load(&request.url) {
Ok(Some(response)) => Ok(response),
Ok(None) => Err(error),
Err(cache_error) => Err(FetchError::Cache(cache_error.to_string())),
},
Err(error) => Err(error),
}
}
}
#[derive(Debug, Clone, Default)]
pub struct MemoryFetcher {
responses: BTreeMap<String, Result<Response, FetchError>>,
form_responses: BTreeMap<String, Result<Response, FetchError>>,
}
impl MemoryFetcher {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn insert(&mut self, url: IndexUrl, body: impl Into<String>) {
let response = Response {
final_url: url.clone(),
redirects: Vec::new(),
mime_type: Some("text/html".to_owned()),
body: body.into(),
};
self.responses.insert(url.as_str().to_owned(), Ok(response));
}
pub fn insert_response(&mut self, requested_url: IndexUrl, response: Response) {
self.responses
.insert(requested_url.as_str().to_owned(), Ok(response));
}
pub fn insert_error(&mut self, url: IndexUrl, error: FetchError) {
self.responses.insert(url.as_str().to_owned(), Err(error));
}
pub fn insert_form_response(&mut self, submission: &FormSubmission, response: Response) {
self.form_responses
.insert(form_submission_key(submission), Ok(response));
}
pub fn insert_form_error(&mut self, submission: &FormSubmission, error: FetchError) {
self.form_responses
.insert(form_submission_key(submission), Err(error));
}
pub fn insert_redirect(
&mut self,
requested_url: IndexUrl,
final_url: IndexUrl,
redirects: Vec<IndexUrl>,
body: impl Into<String>,
) {
let response = Response {
final_url,
redirects,
mime_type: Some("text/html".to_owned()),
body: body.into(),
};
self.responses
.insert(requested_url.as_str().to_owned(), Ok(response));
}
}
impl Fetcher for MemoryFetcher {
fn fetch(&self, request: &Request) -> Result<Response, FetchError> {
match self.responses.get(request.url.as_str()).cloned() {
Some(result) => result,
None => Err(FetchError::NotFound(request.url.as_str().to_owned())),
}
}
}
impl FormSubmitter for MemoryFetcher {
fn submit_form(&self, submission: &FormSubmission) -> Result<Response, FetchError> {
if submission.method == FormMethod::Get {
return self.fetch(&Request {
url: submission.action.clone(),
});
}
match self
.form_responses
.get(&form_submission_key(submission))
.cloned()
{
Some(result) => result,
None => Err(FetchError::NotFound(form_submission_key(submission))),
}
}
}
fn form_submission_key(submission: &FormSubmission) -> String {
format!(
"{}\t{}\t{}",
submission.method.as_str(),
submission.action,
submission.body.as_deref().unwrap_or_default()
)
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FileCache {
root: PathBuf,
}
impl FileCache {
#[must_use]
pub fn new(root: impl Into<PathBuf>) -> Self {
Self { root: root.into() }
}
#[must_use]
pub fn root(&self) -> &Path {
&self.root
}
#[must_use]
pub fn path_for(&self, url: &IndexUrl) -> PathBuf {
self.root.join(format!("{}.cache", url.cache_key()))
}
pub fn store(&self, response: &Response) -> Result<PathBuf, CacheError> {
fs::create_dir_all(&self.root).map_err(CacheError::from)?;
let path = self.path_for(&response.final_url);
fs::write(&path, serialize_response(response)).map_err(CacheError::from)?;
Ok(path)
}
pub fn load(&self, url: &IndexUrl) -> Result<Option<Response>, CacheError> {
let path = self.path_for(url);
if !path.exists() {
return Ok(None);
}
let contents = fs::read_to_string(path).map_err(CacheError::from)?;
deserialize_response(&contents).map(Some)
}
}
fn serialize_response(response: &Response) -> String {
let mut lines = Vec::new();
lines.push("index-cache-v1".to_owned());
lines.push(format!(
"final-url\t{}",
escape_field(response.final_url.as_str())
));
for redirect in &response.redirects {
lines.push(format!("redirect\t{}", escape_field(redirect.as_str())));
}
lines.push(format!(
"mime\t{}",
response
.mime_type
.as_ref()
.map_or_else(String::new, |mime| escape_field(mime))
));
lines.push(String::new());
lines.push(response.body.clone());
lines.join("\n")
}
fn deserialize_response(contents: &str) -> Result<Response, CacheError> {
let Some((header, body)) = contents.split_once("\n\n") else {
return Err(CacheError::Parse(
"cache body separator is missing".to_owned(),
));
};
let mut lines = header.lines();
if lines.next() != Some("index-cache-v1") {
return Err(CacheError::Parse("missing cache header".to_owned()));
}
let mut final_url = None;
let mut redirects = Vec::new();
let mut mime_type = None;
for line in lines {
let fields: Vec<&str> = line.split('\t').collect();
match fields.first().copied() {
Some("final-url") if fields.len() == 2 => {
final_url = Some(parse_cache_url(fields[1])?);
}
Some("redirect") if fields.len() == 2 => {
redirects.push(parse_cache_url(fields[1])?);
}
Some("mime") if fields.len() == 2 => {
let mime = unescape_field(fields[1]).map_err(CacheError::Parse)?;
if !mime.is_empty() {
mime_type = Some(mime);
}
}
_ => return Err(CacheError::Parse("invalid cache record".to_owned())),
}
}
Ok(Response {
final_url: final_url.ok_or_else(|| CacheError::Parse("missing final URL".to_owned()))?,
redirects,
mime_type,
body: body.to_owned(),
})
}
fn parse_cache_url(input: &str) -> Result<IndexUrl, CacheError> {
let unescaped = unescape_field(input).map_err(CacheError::Parse)?;
IndexUrl::parse(unescaped).map_err(CacheError::Url)
}
fn escape_field(input: &str) -> String {
let mut escaped = String::with_capacity(input.len());
for ch in input.chars() {
match ch {
'\\' => escaped.push_str("\\\\"),
'\t' => escaped.push_str("\\t"),
'\n' => escaped.push_str("\\n"),
'\r' => escaped.push_str("\\r"),
_ => escaped.push(ch),
}
}
escaped
}
fn unescape_field(input: &str) -> Result<String, String> {
let mut unescaped = String::with_capacity(input.len());
let mut chars = input.chars();
while let Some(ch) = chars.next() {
if ch != '\\' {
unescaped.push(ch);
continue;
}
let Some(next) = chars.next() else {
return Err("dangling escape".to_owned());
};
match next {
'\\' => unescaped.push('\\'),
't' => unescaped.push('\t'),
'n' => unescaped.push('\n'),
'r' => unescaped.push('\r'),
other => return Err(format!("unknown escape: {other}")),
}
}
Ok(unescaped)
}
#[cfg(test)]
mod tests {
use std::cell::RefCell;
use std::collections::VecDeque;
use std::time::{SystemTime, UNIX_EPOCH};
use index_core::{Form, FormSubmission, IndexUrl, Input};
use super::{
CacheFallbackFetcher, FetchError, Fetcher, FileCache, FormSubmitter, MemoryFetcher,
Request, Response, RetryPolicy, RetryingFetcher, SecureFetcher, UreqFetcher,
form_submission_key, unescape_field, validate_text_mime,
};
use index_security::{ContentLimits, SecurityError};
#[test]
fn memory_fetcher_returns_registered_response() -> Result<(), Box<dyn std::error::Error>> {
let url = IndexUrl::parse("https://example.com")?;
let mut fetcher = MemoryFetcher::new();
fetcher.insert(url.clone(), "<title>Hello</title>");
let response = fetcher.fetch(&Request { url });
assert!(response.is_ok());
assert_eq!(
response.map(|r| r.body),
Ok("<title>Hello</title>".to_owned())
);
Ok(())
}
#[test]
fn memory_fetcher_returns_not_found_for_unknown_url() -> Result<(), Box<dyn std::error::Error>>
{
let known = IndexUrl::parse("https://example.com")?;
let unknown = IndexUrl::parse("https://example.com/missing")?;
let mut fetcher = MemoryFetcher::new();
fetcher.insert(known, "<title>Hello</title>");
let response = fetcher.fetch(&Request { url: unknown });
assert_eq!(
response,
Err(FetchError::NotFound(
"https://example.com/missing".to_owned()
))
);
Ok(())
}
#[test]
fn fetch_error_messages_are_actionable() {
let not_found = FetchError::NotFound("https://example.com/nope".to_owned()).to_string();
assert!(not_found.contains("https://example.com/nope"));
let network = FetchError::NetworkNotImplemented.to_string();
assert!(network.contains("not implemented"));
let network = FetchError::Network("connection refused".to_owned()).to_string();
assert!(network.contains("connection refused"));
let timeout = FetchError::Timeout { timeout_ms: 2500 }.to_string();
assert!(timeout.contains("2500ms"));
let status = FetchError::HttpStatus {
status: 503,
url: "https://example.com".to_owned(),
}
.to_string();
assert!(status.contains("503"));
let mime = FetchError::UnsupportedContentType("image/png".to_owned()).to_string();
assert!(mime.contains("image/png"));
let cache = FetchError::Cache("bad record".to_owned()).to_string();
assert!(cache.contains("bad record"));
let url = FetchError::Url(index_core::UrlError::MissingScheme).to_string();
assert!(url.contains("missing a scheme"));
let security = FetchError::Security(SecurityError::ContentTooLarge {
actual_bytes: 5,
limit_bytes: 4,
})
.to_string();
assert!(security.contains("security policy"));
}
#[test]
fn fetch_errors_classify_transient_failures() {
assert!(FetchError::Network("dns lookup failed".to_owned()).is_transient());
assert!(FetchError::Timeout { timeout_ms: 1000 }.is_transient());
assert!(
!FetchError::HttpStatus {
status: 404,
url: "https://example.com".to_owned()
}
.is_transient()
);
assert!(!FetchError::UnsupportedContentType("image/png".to_owned()).is_transient());
}
#[test]
fn memory_fetcher_tracks_redirects() -> Result<(), Box<dyn std::error::Error>> {
let requested = IndexUrl::parse("http://example.com")?;
let hop = IndexUrl::parse("https://example.com")?;
let final_url = IndexUrl::parse("https://www.example.com")?;
let mut fetcher = MemoryFetcher::new();
fetcher.insert_redirect(
requested.clone(),
final_url.clone(),
vec![hop.clone()],
"<title>Moved</title>",
);
let response = fetcher.fetch(&Request { url: requested })?;
assert_eq!(response.final_url, final_url);
assert_eq!(response.redirects, vec![hop]);
Ok(())
}
#[test]
fn memory_fetcher_submits_get_forms_via_resolved_action()
-> Result<(), Box<dyn std::error::Error>> {
let action = IndexUrl::parse("https://example.com/search")?;
let form = Form {
name: "search".to_owned(),
method: "GET".to_owned(),
action: action.as_str().to_owned(),
inputs: vec![Input {
name: "q".to_owned(),
kind: "search".to_owned(),
value: Some("index".to_owned()),
required: true,
}],
buttons: Vec::new(),
};
let submission = form.submit(None, &[])?;
let mut fetcher = MemoryFetcher::new();
fetcher.insert(submission.action.clone(), "<title>Search results</title>");
let response = fetcher.submit_form(&submission)?;
assert_eq!(response.final_url, submission.action);
assert!(response.body.contains("Search results"));
Ok(())
}
#[test]
fn memory_fetcher_submits_post_forms_by_method_url_and_body()
-> Result<(), Box<dyn std::error::Error>> {
let form = Form {
name: "login".to_owned(),
method: "POST".to_owned(),
action: "https://example.com/login".to_owned(),
inputs: vec![Input {
name: "user".to_owned(),
kind: "text".to_owned(),
value: Some("index".to_owned()),
required: true,
}],
buttons: Vec::new(),
};
let submission = form.submit(None, &[])?;
let mut fetcher = MemoryFetcher::new();
fetcher.insert_form_response(
&submission,
Response {
final_url: submission.action.clone(),
redirects: Vec::new(),
mime_type: Some("text/html".to_owned()),
body: "<title>Logged in</title>".to_owned(),
},
);
let response = fetcher.submit_form(&submission)?;
assert_eq!(response.body, "<title>Logged in</title>");
Ok(())
}
#[test]
fn memory_fetcher_reports_missing_post_form_submission()
-> Result<(), Box<dyn std::error::Error>> {
let form = Form {
name: "login".to_owned(),
method: "POST".to_owned(),
action: "https://example.com/login".to_owned(),
inputs: vec![Input {
name: "user".to_owned(),
kind: "text".to_owned(),
value: Some("index".to_owned()),
required: true,
}],
buttons: Vec::new(),
};
let submission = form.submit(None, &[])?;
let fetcher = MemoryFetcher::new();
assert_eq!(
fetcher.submit_form(&submission),
Err(FetchError::NotFound(
"POST\thttps://example.com/login\tuser=index".to_owned()
))
);
Ok(())
}
#[test]
fn memory_fetcher_returns_registered_form_error() -> Result<(), Box<dyn std::error::Error>> {
let form = Form {
name: "login".to_owned(),
method: "POST".to_owned(),
action: "https://example.com/login".to_owned(),
inputs: vec![Input {
name: "user".to_owned(),
kind: "text".to_owned(),
value: Some("index".to_owned()),
required: true,
}],
buttons: Vec::new(),
};
let submission = form.submit(None, &[])?;
let mut fetcher = MemoryFetcher::new();
fetcher.insert_form_error(&submission, FetchError::Timeout { timeout_ms: 750 });
assert_eq!(
fetcher.submit_form(&submission),
Err(FetchError::Timeout { timeout_ms: 750 })
);
Ok(())
}
#[test]
fn ureq_fetcher_can_be_constructed_without_network_io() {
let _fetcher = UreqFetcher::new();
}
#[test]
fn defaults_and_accessors_are_stable() {
let policy = RetryPolicy::default();
assert_eq!(policy.max_attempts(), 3);
assert_eq!(policy.backoff_millis(), vec![0, 100, 250]);
let clamped = RetryPolicy::new(0, [7_u64]);
assert_eq!(clamped.max_attempts(), 1);
assert_eq!(clamped.backoff_millis(), vec![7]);
let fetcher = MemoryFetcher::new();
let secure = SecureFetcher::new(fetcher);
let _inner: &MemoryFetcher = secure.inner();
let retrying = RetryingFetcher::new(MemoryFetcher::new());
assert_eq!(retrying.policy().max_attempts(), 3);
let _default_fetcher = UreqFetcher::default();
}
#[test]
fn validate_text_mime_accepts_none_and_rejects_unknown_types() {
assert_eq!(validate_text_mime(None), Ok(()));
assert_eq!(validate_text_mime(Some("text/html; charset=utf-8")), Ok(()));
assert_eq!(
validate_text_mime(Some("image/png")),
Err(FetchError::UnsupportedContentType("image/png".to_owned()))
);
}
#[test]
fn secure_fetcher_rejects_large_response() -> Result<(), Box<dyn std::error::Error>> {
let url = IndexUrl::parse("https://example.com/large")?;
let mut fetcher = MemoryFetcher::new();
fetcher.insert(url.clone(), "12345");
let secure = SecureFetcher::with_limits(fetcher, ContentLimits::new(4, 100, 20, 10));
assert_eq!(
secure.fetch(&Request { url }),
Err(FetchError::Security(SecurityError::ContentTooLarge {
actual_bytes: 5,
limit_bytes: 4
}))
);
Ok(())
}
#[test]
fn secure_fetcher_rejects_unsupported_content_type() -> Result<(), Box<dyn std::error::Error>> {
let url = IndexUrl::parse("https://example.com/image")?;
let mut fetcher = MemoryFetcher::new();
fetcher.insert_response(
url.clone(),
Response {
final_url: url.clone(),
redirects: Vec::new(),
mime_type: Some("image/png".to_owned()),
body: "not really text".to_owned(),
},
);
let secure = SecureFetcher::new(fetcher);
assert_eq!(
secure.fetch(&Request { url }),
Err(FetchError::UnsupportedContentType("image/png".to_owned()))
);
Ok(())
}
#[test]
fn secure_fetcher_validates_form_submission_responses() -> Result<(), Box<dyn std::error::Error>>
{
let form = Form {
name: "comment".to_owned(),
method: "POST".to_owned(),
action: "https://example.com/comment".to_owned(),
inputs: vec![Input {
name: "body".to_owned(),
kind: "text".to_owned(),
value: Some("hello".to_owned()),
required: true,
}],
buttons: Vec::new(),
};
let submission = form.submit(None, &[])?;
let mut fetcher = MemoryFetcher::new();
fetcher.insert_form_response(
&submission,
Response {
final_url: submission.action.clone(),
redirects: Vec::new(),
mime_type: Some("text/plain; charset=utf-8".to_owned()),
body: "accepted".to_owned(),
},
);
let secure = SecureFetcher::new(fetcher);
let response = secure.submit_form(&submission)?;
assert_eq!(response.body, "accepted");
Ok(())
}
#[test]
fn secure_fetcher_rejects_unsupported_form_response_type()
-> Result<(), Box<dyn std::error::Error>> {
let form = Form {
name: "upload".to_owned(),
method: "POST".to_owned(),
action: "https://example.com/upload".to_owned(),
inputs: Vec::new(),
buttons: Vec::new(),
};
let submission = form.submit(None, &[])?;
let mut fetcher = MemoryFetcher::new();
fetcher.insert_form_response(
&submission,
Response {
final_url: submission.action.clone(),
redirects: Vec::new(),
mime_type: Some("image/png".to_owned()),
body: "not text".to_owned(),
},
);
let secure = SecureFetcher::new(fetcher);
assert_eq!(
secure.submit_form(&submission),
Err(FetchError::UnsupportedContentType("image/png".to_owned()))
);
Ok(())
}
#[test]
fn secure_fetcher_rejects_large_form_response() -> Result<(), Box<dyn std::error::Error>> {
let form = Form {
name: "preview".to_owned(),
method: "POST".to_owned(),
action: "https://example.com/preview".to_owned(),
inputs: Vec::new(),
buttons: Vec::new(),
};
let submission = form.submit(None, &[])?;
let mut fetcher = MemoryFetcher::new();
fetcher.insert_form_response(
&submission,
Response {
final_url: submission.action.clone(),
redirects: Vec::new(),
mime_type: Some("text/html".to_owned()),
body: "12345".to_owned(),
},
);
let secure = SecureFetcher::with_limits(fetcher, ContentLimits::new(4, 100, 20, 10));
assert_eq!(
secure.submit_form(&submission),
Err(FetchError::Security(SecurityError::ContentTooLarge {
actual_bytes: 5,
limit_bytes: 4
}))
);
Ok(())
}
#[derive(Debug)]
struct SequenceFetcher {
responses: RefCell<VecDeque<Result<Response, FetchError>>>,
}
impl SequenceFetcher {
fn new(responses: impl IntoIterator<Item = Result<Response, FetchError>>) -> Self {
Self {
responses: RefCell::new(responses.into_iter().collect()),
}
}
}
impl Fetcher for SequenceFetcher {
fn fetch(&self, request: &Request) -> Result<Response, FetchError> {
self.responses
.borrow_mut()
.pop_front()
.unwrap_or_else(|| Err(FetchError::NotFound(request.url.as_str().to_owned())))
}
}
#[derive(Debug)]
struct SequenceSubmitter {
responses: RefCell<VecDeque<Result<Response, FetchError>>>,
}
impl SequenceSubmitter {
fn new(responses: impl IntoIterator<Item = Result<Response, FetchError>>) -> Self {
Self {
responses: RefCell::new(responses.into_iter().collect()),
}
}
}
impl FormSubmitter for SequenceSubmitter {
fn submit_form(&self, submission: &FormSubmission) -> Result<Response, FetchError> {
self.responses
.borrow_mut()
.pop_front()
.unwrap_or_else(|| Err(FetchError::NotFound(form_submission_key(submission))))
}
}
#[test]
fn retrying_fetcher_retries_transient_failures_without_sleeping()
-> Result<(), Box<dyn std::error::Error>> {
let url = IndexUrl::parse("https://example.com/retry")?;
let response = Response {
final_url: url.clone(),
redirects: Vec::new(),
mime_type: Some("text/html".to_owned()),
body: "<title>Retried</title>".to_owned(),
};
let fetcher = SequenceFetcher::new([
Err(FetchError::Timeout { timeout_ms: 1000 }),
Ok(response.clone()),
]);
let retrying = RetryingFetcher::with_policy(fetcher, RetryPolicy::new(2, [0, 10]));
assert_eq!(retrying.policy().backoff_millis(), vec![0, 10]);
assert_eq!(retrying.fetch(&Request { url })?, response);
Ok(())
}
#[test]
fn retrying_fetcher_retries_form_submissions() -> Result<(), Box<dyn std::error::Error>> {
let form = Form {
name: "post".to_owned(),
method: "POST".to_owned(),
action: "https://example.com/post".to_owned(),
inputs: Vec::new(),
buttons: Vec::new(),
};
let submission = form.submit(None, &[])?;
let response = Response {
final_url: submission.action.clone(),
redirects: Vec::new(),
mime_type: Some("text/html".to_owned()),
body: "<title>Submitted</title>".to_owned(),
};
let fetcher = SequenceSubmitter::new([
Err(FetchError::Timeout { timeout_ms: 1000 }),
Ok(response.clone()),
]);
let retrying = RetryingFetcher::with_policy(fetcher, RetryPolicy::new(2, [0, 10]));
assert_eq!(retrying.submit_form(&submission)?, response);
Ok(())
}
#[test]
fn retrying_form_submitter_does_not_retry_non_transient_failures()
-> Result<(), Box<dyn std::error::Error>> {
let form = Form {
name: "post".to_owned(),
method: "POST".to_owned(),
action: "https://example.com/post".to_owned(),
inputs: Vec::new(),
buttons: Vec::new(),
};
let submission = form.submit(None, &[])?;
let submitter = SequenceSubmitter::new([
Err(FetchError::HttpStatus {
status: 401,
url: submission.action.as_str().to_owned(),
}),
Ok(Response {
final_url: submission.action.clone(),
redirects: Vec::new(),
mime_type: Some("text/html".to_owned()),
body: "<title>Should not happen</title>".to_owned(),
}),
]);
let retrying = RetryingFetcher::with_policy(submitter, RetryPolicy::new(2, [0, 10]));
assert_eq!(
retrying.submit_form(&submission),
Err(FetchError::HttpStatus {
status: 401,
url: submission.action.as_str().to_owned(),
})
);
Ok(())
}
#[test]
fn retrying_fetcher_does_not_retry_non_transient_failures()
-> Result<(), Box<dyn std::error::Error>> {
let url = IndexUrl::parse("https://example.com/missing")?;
let response = Response {
final_url: url.clone(),
redirects: Vec::new(),
mime_type: Some("text/html".to_owned()),
body: "<title>Should not happen</title>".to_owned(),
};
let fetcher = SequenceFetcher::new([
Err(FetchError::HttpStatus {
status: 404,
url: url.as_str().to_owned(),
}),
Ok(response),
]);
let retrying = RetryingFetcher::with_policy(fetcher, RetryPolicy::new(2, [0, 10]));
assert_eq!(
retrying.fetch(&Request { url }),
Err(FetchError::HttpStatus {
status: 404,
url: "https://example.com/missing".to_owned(),
})
);
Ok(())
}
#[test]
fn cache_fallback_reuses_cached_response_after_transient_failure()
-> Result<(), Box<dyn std::error::Error>> {
let root = temp_path("cache-fallback");
let cache = FileCache::new(&root);
let url = IndexUrl::parse("https://example.com/cached")?;
let cached = Response {
final_url: url.clone(),
redirects: Vec::new(),
mime_type: Some("text/html".to_owned()),
body: "<title>Cached</title>".to_owned(),
};
let path = cache.store(&cached)?;
let mut fetcher = MemoryFetcher::new();
fetcher.insert_error(url.clone(), FetchError::Timeout { timeout_ms: 500 });
let fallback = CacheFallbackFetcher::new(fetcher, cache.clone());
assert_eq!(fallback.fetch(&Request { url })?, cached);
std::fs::remove_file(path)?;
std::fs::remove_dir(&root)?;
Ok(())
}
#[test]
fn cache_fallback_keeps_successes_and_non_transient_failures()
-> Result<(), Box<dyn std::error::Error>> {
let root = temp_path("cache-success");
let cache = FileCache::new(&root);
let url = IndexUrl::parse("https://example.com/live")?;
let response = Response {
final_url: url.clone(),
redirects: Vec::new(),
mime_type: Some("text/html".to_owned()),
body: "<title>Live</title>".to_owned(),
};
let mut fetcher = MemoryFetcher::new();
fetcher.insert_response(url.clone(), response.clone());
let fallback = CacheFallbackFetcher::new(fetcher, cache.clone());
assert!(fallback.cache().path_for(&url).starts_with(cache.root()));
assert_eq!(fallback.fetch(&Request { url: url.clone() })?, response);
assert_eq!(
cache.load(&url)?.map(|cached| cached.body),
Some("<title>Live</title>".to_owned())
);
let mut missing = MemoryFetcher::new();
missing.insert_error(
url.clone(),
FetchError::HttpStatus {
status: 404,
url: url.as_str().to_owned(),
},
);
let fallback = CacheFallbackFetcher::new(missing, cache.clone());
assert_eq!(
fallback.fetch(&Request { url: url.clone() }),
Err(FetchError::HttpStatus {
status: 404,
url: url.as_str().to_owned()
})
);
if let Some(path) = cache.load(&url)?.map(|_| cache.path_for(&url)) {
std::fs::remove_file(path)?;
}
std::fs::remove_dir(&root)?;
Ok(())
}
#[test]
fn cache_fallback_reuses_cached_form_response_after_transient_failure()
-> Result<(), Box<dyn std::error::Error>> {
let root = temp_path("cache-form-fallback");
let cache = FileCache::new(&root);
let form = Form {
name: "search".to_owned(),
method: "GET".to_owned(),
action: "https://example.com/search?q=index".to_owned(),
inputs: Vec::new(),
buttons: Vec::new(),
};
let submission = form.submit(None, &[])?;
let cached = Response {
final_url: submission.action.clone(),
redirects: Vec::new(),
mime_type: Some("text/html".to_owned()),
body: "<title>Cached search</title>".to_owned(),
};
let path = cache.store(&cached)?;
let mut fetcher = MemoryFetcher::new();
fetcher.insert_error(
submission.action.clone(),
FetchError::Timeout { timeout_ms: 500 },
);
let fallback = CacheFallbackFetcher::new(fetcher, cache.clone());
assert_eq!(fallback.submit_form(&submission)?, cached);
std::fs::remove_file(path)?;
std::fs::remove_dir(&root)?;
Ok(())
}
#[test]
fn cache_fallback_returns_transient_error_when_cache_is_missing()
-> Result<(), Box<dyn std::error::Error>> {
let root = temp_path("cache-miss-fallback");
let cache = FileCache::new(&root);
let url = IndexUrl::parse("https://example.com/miss")?;
let mut fetcher = MemoryFetcher::new();
fetcher.insert_error(url.clone(), FetchError::Timeout { timeout_ms: 500 });
let fallback = CacheFallbackFetcher::new(fetcher, cache);
assert_eq!(
fallback.fetch(&Request { url }),
Err(FetchError::Timeout { timeout_ms: 500 })
);
Ok(())
}
#[test]
fn cache_fallback_reports_cache_parse_errors() -> Result<(), Box<dyn std::error::Error>> {
let root = temp_path("cache-parse-error");
let cache = FileCache::new(&root);
let url = IndexUrl::parse("https://example.com/parse")?;
std::fs::create_dir_all(cache.root())?;
std::fs::write(cache.path_for(&url), "broken-cache-content")?;
let mut fetcher = MemoryFetcher::new();
fetcher.insert_error(url.clone(), FetchError::Timeout { timeout_ms: 500 });
let fallback = CacheFallbackFetcher::new(fetcher, cache.clone());
let result = fallback.fetch(&Request { url: url.clone() });
assert!(
matches!(result, Err(FetchError::Cache(error)) if error.contains("cache data is invalid"))
);
std::fs::remove_file(cache.path_for(&url))?;
std::fs::remove_dir(cache.root())?;
Ok(())
}
#[test]
fn cache_fallback_form_submitter_reports_cache_errors_and_misses()
-> Result<(), Box<dyn std::error::Error>> {
let form = Form {
name: "search".to_owned(),
method: "POST".to_owned(),
action: "https://example.com/search".to_owned(),
inputs: Vec::new(),
buttons: Vec::new(),
};
let submission = form.submit(None, &[])?;
let miss_root = temp_path("cache-form-miss");
let miss_cache = FileCache::new(&miss_root);
let mut miss_fetcher = MemoryFetcher::new();
miss_fetcher.insert_form_error(&submission, FetchError::Timeout { timeout_ms: 250 });
let miss_fallback = CacheFallbackFetcher::new(miss_fetcher, miss_cache);
assert_eq!(
miss_fallback.submit_form(&submission),
Err(FetchError::Timeout { timeout_ms: 250 })
);
let parse_root = temp_path("cache-form-parse");
let parse_cache = FileCache::new(&parse_root);
std::fs::create_dir_all(parse_cache.root())?;
std::fs::write(
parse_cache.path_for(&submission.action),
"broken-cache-content",
)?;
let mut parse_fetcher = MemoryFetcher::new();
parse_fetcher.insert_form_error(&submission, FetchError::Timeout { timeout_ms: 250 });
let parse_fallback = CacheFallbackFetcher::new(parse_fetcher, parse_cache.clone());
let parse_result = parse_fallback.submit_form(&submission);
assert!(
matches!(parse_result, Err(FetchError::Cache(error)) if error.contains("cache data is invalid"))
);
std::fs::remove_file(parse_cache.path_for(&submission.action))?;
std::fs::remove_dir(parse_cache.root())?;
Ok(())
}
#[test]
fn secure_fetcher_rejects_redirect_loop() -> Result<(), Box<dyn std::error::Error>> {
let requested = IndexUrl::parse("https://example.com/start")?;
let hop = IndexUrl::parse("https://example.com/hop")?;
let mut fetcher = MemoryFetcher::new();
fetcher.insert_redirect(
requested.clone(),
hop.clone(),
vec![hop.clone()],
"<title>Loop</title>",
);
let secure = SecureFetcher::new(fetcher);
assert_eq!(
secure.fetch(&Request { url: requested }),
Err(FetchError::Security(SecurityError::RedirectLoop {
url: hop
}))
);
Ok(())
}
#[test]
fn file_cache_stores_and_loads_response() -> Result<(), Box<dyn std::error::Error>> {
let root = temp_path("cache");
let cache = FileCache::new(&root);
let url = IndexUrl::parse("https://example.com/docs")?;
let response = Response {
final_url: url.clone(),
redirects: Vec::new(),
mime_type: Some("text/html".to_owned()),
body: "<title>Docs</title>".to_owned(),
};
let path = cache.store(&response)?;
let restored = cache.load(&url)?;
std::fs::remove_file(path)?;
std::fs::remove_dir(&root)?;
assert_eq!(restored, Some(response));
Ok(())
}
#[test]
fn file_cache_returns_none_for_missing_entry() -> Result<(), Box<dyn std::error::Error>> {
let cache = FileCache::new(temp_path("cache-missing"));
let url = IndexUrl::parse("https://example.com/missing")?;
assert_eq!(cache.load(&url)?, None);
assert!(cache.path_for(&url).starts_with(cache.root()));
Ok(())
}
#[test]
fn file_cache_preserves_empty_mime_and_redirects() -> Result<(), Box<dyn std::error::Error>> {
let root = temp_path("cache-redirect");
let cache = FileCache::new(&root);
let requested = IndexUrl::parse("http://example.com")?;
let final_url = IndexUrl::parse("https://example.com")?;
let response = Response {
final_url: final_url.clone(),
redirects: vec![requested],
mime_type: None,
body: "Body\nwith newline".to_owned(),
};
let path = cache.store(&response)?;
let restored = cache.load(&final_url)?;
std::fs::remove_file(path)?;
std::fs::remove_dir(&root)?;
assert_eq!(restored, Some(response));
Ok(())
}
#[test]
fn file_cache_escapes_header_fields() -> Result<(), Box<dyn std::error::Error>> {
let root = temp_path("cache-escaped");
let cache = FileCache::new(&root);
let url = IndexUrl::parse("https://example.com/escaped")?;
let response = Response {
final_url: url.clone(),
redirects: Vec::new(),
mime_type: Some("text/html\twith\\escapes\nand\rreturns".to_owned()),
body: "Body".to_owned(),
};
let path = cache.store(&response)?;
let restored = cache.load(&url)?;
std::fs::remove_file(path)?;
std::fs::remove_dir(&root)?;
assert_eq!(restored, Some(response));
Ok(())
}
#[test]
fn file_cache_rejects_invalid_data() {
assert!(super::deserialize_response("bad").is_err());
assert!(super::deserialize_response("bad\n\nbody").is_err());
assert!(super::deserialize_response("index-cache-v1\nunknown\tvalue\n\nbody").is_err());
assert!(super::deserialize_response("index-cache-v1\nfinal-url\tbad-url\n\nbody").is_err());
assert!(super::deserialize_response("index-cache-v1\nmime\ttext/html\n\nbody").is_err());
assert!(
super::deserialize_response("index-cache-v1\nfinal-url\thttps://example.com\\\n\nbody")
.is_err()
);
}
#[test]
fn unescape_field_rejects_unknown_escape_sequences() {
assert_eq!(unescape_field("\\x"), Err("unknown escape: x".to_owned()));
}
#[test]
fn cache_error_messages_are_actionable() {
let io = super::CacheError::from(std::io::Error::other("disk full")).to_string();
assert!(io.contains("disk full"));
let parse = super::CacheError::Parse("bad record".to_owned()).to_string();
assert!(parse.contains("bad record"));
let url_error = super::CacheError::Url(index_core::UrlError::MissingScheme).to_string();
assert!(url_error.contains("missing a scheme"));
}
#[test]
fn file_cache_paths_use_normalized_url_keys() -> Result<(), Box<dyn std::error::Error>> {
let cache = FileCache::new(temp_path("cache-key"));
let first = IndexUrl::parse("https://EXAMPLE.com:443/docs#first")?;
let second = IndexUrl::parse("https://example.com/docs#second")?;
assert_eq!(cache.path_for(&first), cache.path_for(&second));
Ok(())
}
fn temp_path(name: &str) -> std::path::PathBuf {
let mut path = std::env::temp_dir();
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map_or(0, |duration| duration.as_nanos());
path.push(format!("index-http-{name}-{nanos}"));
path
}
}