Skip to main content

index_http/
lib.rs

1//! Fetch abstractions for Index.
2//!
3use std::collections::BTreeMap;
4use std::fmt::{Display, Formatter};
5use std::fs;
6use std::path::{Path, PathBuf};
7use std::time::Duration;
8
9use index_core::{FormMethod, FormSubmission, IndexUrl, UrlError};
10use index_security::{ContentLimits, SecurityError, check_content_size, validate_redirect_chain};
11use ureq::ResponseExt;
12
13/// Fetch request.
14#[derive(Debug, Clone, PartialEq, Eq)]
15pub struct Request {
16    /// Target URL.
17    pub url: IndexUrl,
18}
19
20/// Fetch response.
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub struct Response {
23    /// Final URL after redirects.
24    pub final_url: IndexUrl,
25    /// Redirect hops observed before the final URL.
26    pub redirects: Vec<IndexUrl>,
27    /// MIME type when known.
28    pub mime_type: Option<String>,
29    /// Response body as UTF-8 text for the initial prototype.
30    pub body: String,
31}
32
33/// Fetch errors.
34#[derive(Debug, Clone, PartialEq, Eq)]
35pub enum FetchError {
36    /// No response is registered for the requested URL.
37    NotFound(String),
38    /// Real network IO is unavailable for this fetcher.
39    NetworkNotImplemented,
40    /// Network IO failed.
41    Network(String),
42    /// The network operation timed out.
43    Timeout {
44        /// Timeout budget in milliseconds.
45        timeout_ms: u64,
46    },
47    /// The server returned a non-success HTTP status.
48    HttpStatus {
49        /// HTTP status code.
50        status: u16,
51        /// URL associated with the status.
52        url: String,
53    },
54    /// The response MIME type is not supported for text transformation.
55    UnsupportedContentType(String),
56    /// A URL returned by the transport was invalid or unsafe.
57    Url(UrlError),
58    /// Cache lookup failed while handling a network fallback.
59    Cache(String),
60    /// A security policy rejected the response.
61    Security(SecurityError),
62}
63
64impl Display for FetchError {
65    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
66        match self {
67            Self::NotFound(url) => write!(f, "no fixture response registered for {url}"),
68            Self::NetworkNotImplemented => f.write_str("network fetching is not implemented yet"),
69            Self::Network(error) => write!(f, "network fetch failed: {error}"),
70            Self::Timeout { timeout_ms } => {
71                write!(f, "network fetch timed out after {timeout_ms}ms")
72            }
73            Self::HttpStatus { status, url } => {
74                write!(f, "HTTP status {status} returned for {url}")
75            }
76            Self::UnsupportedContentType(mime_type) => {
77                write!(f, "unsupported response content type: {mime_type}")
78            }
79            Self::Url(error) => write!(f, "network response URL is invalid: {error}"),
80            Self::Cache(error) => write!(f, "cache fallback failed: {error}"),
81            Self::Security(error) => write!(f, "fetch security policy rejected response: {error}"),
82        }
83    }
84}
85
86impl std::error::Error for FetchError {}
87
88impl FetchError {
89    /// Returns whether retrying or falling back to cache is reasonable.
90    #[must_use]
91    pub fn is_transient(&self) -> bool {
92        matches!(self, Self::Network(_) | Self::Timeout { .. })
93    }
94}
95
96/// Deterministic retry policy.
97#[derive(Debug, Clone, PartialEq, Eq)]
98pub struct RetryPolicy {
99    max_attempts: u8,
100    backoff: Vec<Duration>,
101}
102
103impl RetryPolicy {
104    /// Creates a retry policy. Attempts are clamped to at least one.
105    #[must_use]
106    pub fn new(max_attempts: u8, backoff_millis: impl IntoIterator<Item = u64>) -> Self {
107        Self {
108            max_attempts: max_attempts.max(1),
109            backoff: backoff_millis
110                .into_iter()
111                .map(Duration::from_millis)
112                .collect(),
113        }
114    }
115
116    /// Returns the maximum attempt count.
117    #[must_use]
118    pub const fn max_attempts(&self) -> u8 {
119        self.max_attempts
120    }
121
122    /// Returns the deterministic backoff schedule in milliseconds.
123    #[must_use]
124    pub fn backoff_millis(&self) -> Vec<u64> {
125        self.backoff
126            .iter()
127            .map(|duration| duration.as_millis().min(u128::from(u64::MAX)) as u64)
128            .collect()
129    }
130}
131
132impl Default for RetryPolicy {
133    fn default() -> Self {
134        Self::new(3, [0, 100, 250])
135    }
136}
137
138/// Cache persistence errors.
139#[derive(Debug)]
140pub enum CacheError {
141    /// Filesystem error.
142    Io(std::io::Error),
143    /// Cached response data was invalid.
144    Parse(String),
145    /// Cached URL was invalid.
146    Url(index_core::UrlError),
147}
148
149impl From<std::io::Error> for CacheError {
150    fn from(value: std::io::Error) -> Self {
151        Self::Io(value)
152    }
153}
154
155impl Display for CacheError {
156    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
157        match self {
158            Self::Io(error) => write!(f, "cache IO failed: {error}"),
159            Self::Parse(reason) => write!(f, "cache data is invalid: {reason}"),
160            Self::Url(error) => write!(f, "cache URL is invalid: {error}"),
161        }
162    }
163}
164
165impl std::error::Error for CacheError {}
166
167/// Fetcher trait.
168pub trait Fetcher {
169    /// Fetches a request.
170    fn fetch(&self, request: &Request) -> Result<Response, FetchError>;
171}
172
173/// Form submission transport trait.
174pub trait FormSubmitter {
175    /// Submits a resolved semantic form request.
176    fn submit_form(&self, submission: &FormSubmission) -> Result<Response, FetchError>;
177}
178
179/// Blocking HTTP fetcher backed by `ureq`.
180#[derive(Debug, Clone)]
181pub struct UreqFetcher {
182    agent: ureq::Agent,
183}
184
185impl UreqFetcher {
186    /// Creates a fetcher with conservative defaults.
187    #[must_use]
188    pub fn new() -> Self {
189        let agent: ureq::Agent = ureq::Agent::config_builder()
190            .save_redirect_history(true)
191            .timeout_global(Some(Duration::from_secs(30)))
192            .user_agent("Index/0.1.0")
193            .build()
194            .into();
195        Self { agent }
196    }
197}
198
199impl Default for UreqFetcher {
200    fn default() -> Self {
201        Self::new()
202    }
203}
204
205impl Fetcher for UreqFetcher {
206    fn fetch(&self, request: &Request) -> Result<Response, FetchError> {
207        let mut response = self
208            .agent
209            .get(request.url.as_str())
210            .call()
211            .map_err(|error| FetchError::Network(error.to_string()))?;
212        response_from_ureq(request.url.as_str(), &mut response)
213    }
214}
215
216impl FormSubmitter for UreqFetcher {
217    fn submit_form(&self, submission: &FormSubmission) -> Result<Response, FetchError> {
218        match submission.method {
219            FormMethod::Get => self.fetch(&Request {
220                url: submission.action.clone(),
221            }),
222            FormMethod::Post => {
223                let body = submission.body.as_deref().unwrap_or_default();
224                let mut response = self
225                    .agent
226                    .post(submission.action.as_str())
227                    .header("content-type", "application/x-www-form-urlencoded")
228                    .send(body)
229                    .map_err(|error| FetchError::Network(error.to_string()))?;
230                response_from_ureq(submission.action.as_str(), &mut response)
231            }
232        }
233    }
234}
235
236fn response_from_ureq(
237    requested_url: &str,
238    response: &mut ureq::http::Response<ureq::Body>,
239) -> Result<Response, FetchError> {
240    let final_url = IndexUrl::parse(response.get_uri().to_string()).map_err(FetchError::Url)?;
241    let redirects = response
242        .get_redirect_history()
243        .unwrap_or(&[])
244        .iter()
245        .filter_map(|uri| {
246            let value = uri.to_string();
247            (value != requested_url && value != final_url.as_str()).then_some(value)
248        })
249        .map(IndexUrl::parse)
250        .collect::<Result<Vec<_>, _>>()
251        .map_err(FetchError::Url)?;
252    let mime_type = response
253        .headers()
254        .get("content-type")
255        .and_then(|value| value.to_str().ok())
256        .map(ToOwned::to_owned);
257    let body = response
258        .body_mut()
259        .read_to_string()
260        .map_err(|error| FetchError::Network(error.to_string()))?;
261
262    Ok(Response {
263        final_url,
264        redirects,
265        mime_type,
266        body,
267    })
268}
269
270/// Fetcher wrapper that enforces hostile-input security policy.
271#[derive(Debug, Clone)]
272pub struct SecureFetcher<F> {
273    inner: F,
274    limits: ContentLimits,
275}
276
277impl<F> SecureFetcher<F> {
278    /// Wraps a fetcher with default security limits.
279    #[must_use]
280    pub fn new(inner: F) -> Self {
281        Self {
282            inner,
283            limits: ContentLimits::default(),
284        }
285    }
286
287    /// Wraps a fetcher with explicit security limits.
288    #[must_use]
289    pub const fn with_limits(inner: F, limits: ContentLimits) -> Self {
290        Self { inner, limits }
291    }
292
293    /// Returns the wrapped fetcher.
294    #[must_use]
295    pub fn inner(&self) -> &F {
296        &self.inner
297    }
298}
299
300impl<F: Fetcher> Fetcher for SecureFetcher<F> {
301    fn fetch(&self, request: &Request) -> Result<Response, FetchError> {
302        let response = self.inner.fetch(request)?;
303        validate_response(&request.url, response, self.limits)
304    }
305}
306
307impl<F: FormSubmitter> FormSubmitter for SecureFetcher<F> {
308    fn submit_form(&self, submission: &FormSubmission) -> Result<Response, FetchError> {
309        let response = self.inner.submit_form(submission)?;
310        validate_response(&submission.action, response, self.limits)
311    }
312}
313
314fn validate_response(
315    requested_url: &IndexUrl,
316    response: Response,
317    limits: ContentLimits,
318) -> Result<Response, FetchError> {
319    validate_text_mime(response.mime_type.as_deref())?;
320    validate_redirect_chain(
321        requested_url,
322        &response.redirects,
323        &response.final_url,
324        limits,
325    )
326    .map_err(FetchError::Security)?;
327    check_content_size(&response.body, limits).map_err(FetchError::Security)?;
328    Ok(response)
329}
330
331impl<F: FormSubmitter> FormSubmitter for RetryingFetcher<F> {
332    fn submit_form(&self, submission: &FormSubmission) -> Result<Response, FetchError> {
333        let mut attempt = 1;
334        loop {
335            match self.inner.submit_form(submission) {
336                Ok(response) => return Ok(response),
337                Err(error) if error.is_transient() && attempt < self.policy.max_attempts() => {
338                    attempt += 1;
339                }
340                Err(error) => return Err(error),
341            }
342        }
343    }
344}
345
346impl<F: FormSubmitter> FormSubmitter for CacheFallbackFetcher<F> {
347    fn submit_form(&self, submission: &FormSubmission) -> Result<Response, FetchError> {
348        match self.inner.submit_form(submission) {
349            Ok(response) => {
350                let _ = self.cache.store(&response);
351                Ok(response)
352            }
353            Err(error) if error.is_transient() => match self.cache.load(&submission.action) {
354                Ok(Some(response)) => Ok(response),
355                Ok(None) => Err(error),
356                Err(cache_error) => Err(FetchError::Cache(cache_error.to_string())),
357            },
358            Err(error) => Err(error),
359        }
360    }
361}
362
363fn validate_text_mime(mime_type: Option<&str>) -> Result<(), FetchError> {
364    let Some(mime_type) = mime_type else {
365        return Ok(());
366    };
367    let normalized = mime_type
368        .split(';')
369        .next()
370        .map(str::trim)
371        .unwrap_or_default()
372        .to_ascii_lowercase();
373    match normalized.as_str() {
374        ""
375        | "text/html"
376        | "text/plain"
377        | "application/xhtml+xml"
378        | "application/xml"
379        | "text/xml" => Ok(()),
380        _ => Err(FetchError::UnsupportedContentType(mime_type.to_owned())),
381    }
382}
383
384/// Fetcher wrapper that retries transient failures without sleeping.
385#[derive(Debug, Clone)]
386pub struct RetryingFetcher<F> {
387    inner: F,
388    policy: RetryPolicy,
389}
390
391impl<F> RetryingFetcher<F> {
392    /// Wraps a fetcher with the default retry policy.
393    #[must_use]
394    pub fn new(inner: F) -> Self {
395        Self {
396            inner,
397            policy: RetryPolicy::default(),
398        }
399    }
400
401    /// Wraps a fetcher with an explicit retry policy.
402    #[must_use]
403    pub fn with_policy(inner: F, policy: RetryPolicy) -> Self {
404        Self { inner, policy }
405    }
406
407    /// Returns the configured retry policy.
408    #[must_use]
409    pub fn policy(&self) -> &RetryPolicy {
410        &self.policy
411    }
412}
413
414impl<F: Fetcher> Fetcher for RetryingFetcher<F> {
415    fn fetch(&self, request: &Request) -> Result<Response, FetchError> {
416        let mut attempt = 1;
417        loop {
418            match self.inner.fetch(request) {
419                Ok(response) => return Ok(response),
420                Err(error) if error.is_transient() && attempt < self.policy.max_attempts() => {
421                    attempt += 1;
422                }
423                Err(error) => return Err(error),
424            }
425        }
426    }
427}
428
429/// Fetcher wrapper that reuses cached content after transient failures.
430#[derive(Debug, Clone)]
431pub struct CacheFallbackFetcher<F> {
432    inner: F,
433    cache: FileCache,
434}
435
436impl<F> CacheFallbackFetcher<F> {
437    /// Creates a cache fallback fetcher.
438    #[must_use]
439    pub fn new(inner: F, cache: FileCache) -> Self {
440        Self { inner, cache }
441    }
442
443    /// Returns the cache used for fallback.
444    #[must_use]
445    pub fn cache(&self) -> &FileCache {
446        &self.cache
447    }
448}
449
450impl<F: Fetcher> Fetcher for CacheFallbackFetcher<F> {
451    fn fetch(&self, request: &Request) -> Result<Response, FetchError> {
452        match self.inner.fetch(request) {
453            Ok(response) => {
454                let _ = self.cache.store(&response);
455                Ok(response)
456            }
457            Err(error) if error.is_transient() => match self.cache.load(&request.url) {
458                Ok(Some(response)) => Ok(response),
459                Ok(None) => Err(error),
460                Err(cache_error) => Err(FetchError::Cache(cache_error.to_string())),
461            },
462            Err(error) => Err(error),
463        }
464    }
465}
466
467/// In-memory fetcher useful for tests and fixtures.
468#[derive(Debug, Clone, Default)]
469pub struct MemoryFetcher {
470    responses: BTreeMap<String, Result<Response, FetchError>>,
471    form_responses: BTreeMap<String, Result<Response, FetchError>>,
472}
473
474impl MemoryFetcher {
475    /// Creates an empty memory fetcher.
476    #[must_use]
477    pub fn new() -> Self {
478        Self::default()
479    }
480
481    /// Registers a response body for a URL.
482    pub fn insert(&mut self, url: IndexUrl, body: impl Into<String>) {
483        let response = Response {
484            final_url: url.clone(),
485            redirects: Vec::new(),
486            mime_type: Some("text/html".to_owned()),
487            body: body.into(),
488        };
489        self.responses.insert(url.as_str().to_owned(), Ok(response));
490    }
491
492    /// Registers a complete response for a URL.
493    pub fn insert_response(&mut self, requested_url: IndexUrl, response: Response) {
494        self.responses
495            .insert(requested_url.as_str().to_owned(), Ok(response));
496    }
497
498    /// Registers a deterministic fetch error for a URL.
499    pub fn insert_error(&mut self, url: IndexUrl, error: FetchError) {
500        self.responses.insert(url.as_str().to_owned(), Err(error));
501    }
502
503    /// Registers a complete response for a resolved form submission.
504    pub fn insert_form_response(&mut self, submission: &FormSubmission, response: Response) {
505        self.form_responses
506            .insert(form_submission_key(submission), Ok(response));
507    }
508
509    /// Registers a deterministic form submission error.
510    pub fn insert_form_error(&mut self, submission: &FormSubmission, error: FetchError) {
511        self.form_responses
512            .insert(form_submission_key(submission), Err(error));
513    }
514
515    /// Registers a response that ended at a different URL.
516    pub fn insert_redirect(
517        &mut self,
518        requested_url: IndexUrl,
519        final_url: IndexUrl,
520        redirects: Vec<IndexUrl>,
521        body: impl Into<String>,
522    ) {
523        let response = Response {
524            final_url,
525            redirects,
526            mime_type: Some("text/html".to_owned()),
527            body: body.into(),
528        };
529        self.responses
530            .insert(requested_url.as_str().to_owned(), Ok(response));
531    }
532}
533
534impl Fetcher for MemoryFetcher {
535    fn fetch(&self, request: &Request) -> Result<Response, FetchError> {
536        match self.responses.get(request.url.as_str()).cloned() {
537            Some(result) => result,
538            None => Err(FetchError::NotFound(request.url.as_str().to_owned())),
539        }
540    }
541}
542
543impl FormSubmitter for MemoryFetcher {
544    fn submit_form(&self, submission: &FormSubmission) -> Result<Response, FetchError> {
545        if submission.method == FormMethod::Get {
546            return self.fetch(&Request {
547                url: submission.action.clone(),
548            });
549        }
550        match self
551            .form_responses
552            .get(&form_submission_key(submission))
553            .cloned()
554        {
555            Some(result) => result,
556            None => Err(FetchError::NotFound(form_submission_key(submission))),
557        }
558    }
559}
560
561fn form_submission_key(submission: &FormSubmission) -> String {
562    format!(
563        "{}\t{}\t{}",
564        submission.method.as_str(),
565        submission.action,
566        submission.body.as_deref().unwrap_or_default()
567    )
568}
569
570/// Filesystem-backed response cache.
571#[derive(Debug, Clone, PartialEq, Eq)]
572pub struct FileCache {
573    root: PathBuf,
574}
575
576impl FileCache {
577    /// Creates a cache rooted at a directory.
578    #[must_use]
579    pub fn new(root: impl Into<PathBuf>) -> Self {
580        Self { root: root.into() }
581    }
582
583    /// Returns the root cache directory.
584    #[must_use]
585    pub fn root(&self) -> &Path {
586        &self.root
587    }
588
589    /// Returns the deterministic path used for a URL.
590    #[must_use]
591    pub fn path_for(&self, url: &IndexUrl) -> PathBuf {
592        self.root.join(format!("{}.cache", url.cache_key()))
593    }
594
595    /// Stores a response under its final URL cache key.
596    pub fn store(&self, response: &Response) -> Result<PathBuf, CacheError> {
597        fs::create_dir_all(&self.root).map_err(CacheError::from)?;
598        let path = self.path_for(&response.final_url);
599        fs::write(&path, serialize_response(response)).map_err(CacheError::from)?;
600        Ok(path)
601    }
602
603    /// Loads a response when a cached entry exists for the URL.
604    pub fn load(&self, url: &IndexUrl) -> Result<Option<Response>, CacheError> {
605        let path = self.path_for(url);
606        if !path.exists() {
607            return Ok(None);
608        }
609        let contents = fs::read_to_string(path).map_err(CacheError::from)?;
610        deserialize_response(&contents).map(Some)
611    }
612}
613
614fn serialize_response(response: &Response) -> String {
615    let mut lines = Vec::new();
616    lines.push("index-cache-v1".to_owned());
617    lines.push(format!(
618        "final-url\t{}",
619        escape_field(response.final_url.as_str())
620    ));
621    for redirect in &response.redirects {
622        lines.push(format!("redirect\t{}", escape_field(redirect.as_str())));
623    }
624    lines.push(format!(
625        "mime\t{}",
626        response
627            .mime_type
628            .as_ref()
629            .map_or_else(String::new, |mime| escape_field(mime))
630    ));
631    lines.push(String::new());
632    lines.push(response.body.clone());
633    lines.join("\n")
634}
635
636fn deserialize_response(contents: &str) -> Result<Response, CacheError> {
637    let Some((header, body)) = contents.split_once("\n\n") else {
638        return Err(CacheError::Parse(
639            "cache body separator is missing".to_owned(),
640        ));
641    };
642
643    let mut lines = header.lines();
644    if lines.next() != Some("index-cache-v1") {
645        return Err(CacheError::Parse("missing cache header".to_owned()));
646    }
647
648    let mut final_url = None;
649    let mut redirects = Vec::new();
650    let mut mime_type = None;
651    for line in lines {
652        let fields: Vec<&str> = line.split('\t').collect();
653        match fields.first().copied() {
654            Some("final-url") if fields.len() == 2 => {
655                final_url = Some(parse_cache_url(fields[1])?);
656            }
657            Some("redirect") if fields.len() == 2 => {
658                redirects.push(parse_cache_url(fields[1])?);
659            }
660            Some("mime") if fields.len() == 2 => {
661                let mime = unescape_field(fields[1]).map_err(CacheError::Parse)?;
662                if !mime.is_empty() {
663                    mime_type = Some(mime);
664                }
665            }
666            _ => return Err(CacheError::Parse("invalid cache record".to_owned())),
667        }
668    }
669
670    Ok(Response {
671        final_url: final_url.ok_or_else(|| CacheError::Parse("missing final URL".to_owned()))?,
672        redirects,
673        mime_type,
674        body: body.to_owned(),
675    })
676}
677
678fn parse_cache_url(input: &str) -> Result<IndexUrl, CacheError> {
679    let unescaped = unescape_field(input).map_err(CacheError::Parse)?;
680    IndexUrl::parse(unescaped).map_err(CacheError::Url)
681}
682
683fn escape_field(input: &str) -> String {
684    let mut escaped = String::with_capacity(input.len());
685    for ch in input.chars() {
686        match ch {
687            '\\' => escaped.push_str("\\\\"),
688            '\t' => escaped.push_str("\\t"),
689            '\n' => escaped.push_str("\\n"),
690            '\r' => escaped.push_str("\\r"),
691            _ => escaped.push(ch),
692        }
693    }
694    escaped
695}
696
697fn unescape_field(input: &str) -> Result<String, String> {
698    let mut unescaped = String::with_capacity(input.len());
699    let mut chars = input.chars();
700    while let Some(ch) = chars.next() {
701        if ch != '\\' {
702            unescaped.push(ch);
703            continue;
704        }
705
706        let Some(next) = chars.next() else {
707            return Err("dangling escape".to_owned());
708        };
709        match next {
710            '\\' => unescaped.push('\\'),
711            't' => unescaped.push('\t'),
712            'n' => unescaped.push('\n'),
713            'r' => unescaped.push('\r'),
714            other => return Err(format!("unknown escape: {other}")),
715        }
716    }
717    Ok(unescaped)
718}
719
720#[cfg(test)]
721mod tests {
722    use std::cell::RefCell;
723    use std::collections::VecDeque;
724    use std::time::{SystemTime, UNIX_EPOCH};
725
726    use index_core::{Form, FormSubmission, IndexUrl, Input};
727
728    use super::{
729        CacheFallbackFetcher, FetchError, Fetcher, FileCache, FormSubmitter, MemoryFetcher,
730        Request, Response, RetryPolicy, RetryingFetcher, SecureFetcher, UreqFetcher,
731        form_submission_key, unescape_field, validate_text_mime,
732    };
733    use index_security::{ContentLimits, SecurityError};
734
735    #[test]
736    fn memory_fetcher_returns_registered_response() -> Result<(), Box<dyn std::error::Error>> {
737        let url = IndexUrl::parse("https://example.com")?;
738        let mut fetcher = MemoryFetcher::new();
739        fetcher.insert(url.clone(), "<title>Hello</title>");
740
741        let response = fetcher.fetch(&Request { url });
742        assert!(response.is_ok());
743        assert_eq!(
744            response.map(|r| r.body),
745            Ok("<title>Hello</title>".to_owned())
746        );
747        Ok(())
748    }
749
750    #[test]
751    fn memory_fetcher_returns_not_found_for_unknown_url() -> Result<(), Box<dyn std::error::Error>>
752    {
753        let known = IndexUrl::parse("https://example.com")?;
754        let unknown = IndexUrl::parse("https://example.com/missing")?;
755        let mut fetcher = MemoryFetcher::new();
756        fetcher.insert(known, "<title>Hello</title>");
757
758        let response = fetcher.fetch(&Request { url: unknown });
759        assert_eq!(
760            response,
761            Err(FetchError::NotFound(
762                "https://example.com/missing".to_owned()
763            ))
764        );
765        Ok(())
766    }
767
768    #[test]
769    fn fetch_error_messages_are_actionable() {
770        let not_found = FetchError::NotFound("https://example.com/nope".to_owned()).to_string();
771        assert!(not_found.contains("https://example.com/nope"));
772
773        let network = FetchError::NetworkNotImplemented.to_string();
774        assert!(network.contains("not implemented"));
775
776        let network = FetchError::Network("connection refused".to_owned()).to_string();
777        assert!(network.contains("connection refused"));
778
779        let timeout = FetchError::Timeout { timeout_ms: 2500 }.to_string();
780        assert!(timeout.contains("2500ms"));
781
782        let status = FetchError::HttpStatus {
783            status: 503,
784            url: "https://example.com".to_owned(),
785        }
786        .to_string();
787        assert!(status.contains("503"));
788
789        let mime = FetchError::UnsupportedContentType("image/png".to_owned()).to_string();
790        assert!(mime.contains("image/png"));
791
792        let cache = FetchError::Cache("bad record".to_owned()).to_string();
793        assert!(cache.contains("bad record"));
794
795        let url = FetchError::Url(index_core::UrlError::MissingScheme).to_string();
796        assert!(url.contains("missing a scheme"));
797
798        let security = FetchError::Security(SecurityError::ContentTooLarge {
799            actual_bytes: 5,
800            limit_bytes: 4,
801        })
802        .to_string();
803        assert!(security.contains("security policy"));
804    }
805
806    #[test]
807    fn fetch_errors_classify_transient_failures() {
808        assert!(FetchError::Network("dns lookup failed".to_owned()).is_transient());
809        assert!(FetchError::Timeout { timeout_ms: 1000 }.is_transient());
810        assert!(
811            !FetchError::HttpStatus {
812                status: 404,
813                url: "https://example.com".to_owned()
814            }
815            .is_transient()
816        );
817        assert!(!FetchError::UnsupportedContentType("image/png".to_owned()).is_transient());
818    }
819
820    #[test]
821    fn memory_fetcher_tracks_redirects() -> Result<(), Box<dyn std::error::Error>> {
822        let requested = IndexUrl::parse("http://example.com")?;
823        let hop = IndexUrl::parse("https://example.com")?;
824        let final_url = IndexUrl::parse("https://www.example.com")?;
825        let mut fetcher = MemoryFetcher::new();
826        fetcher.insert_redirect(
827            requested.clone(),
828            final_url.clone(),
829            vec![hop.clone()],
830            "<title>Moved</title>",
831        );
832
833        let response = fetcher.fetch(&Request { url: requested })?;
834
835        assert_eq!(response.final_url, final_url);
836        assert_eq!(response.redirects, vec![hop]);
837        Ok(())
838    }
839
840    #[test]
841    fn memory_fetcher_submits_get_forms_via_resolved_action()
842    -> Result<(), Box<dyn std::error::Error>> {
843        let action = IndexUrl::parse("https://example.com/search")?;
844        let form = Form {
845            name: "search".to_owned(),
846            method: "GET".to_owned(),
847            action: action.as_str().to_owned(),
848            inputs: vec![Input {
849                name: "q".to_owned(),
850                kind: "search".to_owned(),
851                value: Some("index".to_owned()),
852                required: true,
853            }],
854            buttons: Vec::new(),
855        };
856        let submission = form.submit(None, &[])?;
857        let mut fetcher = MemoryFetcher::new();
858        fetcher.insert(submission.action.clone(), "<title>Search results</title>");
859
860        let response = fetcher.submit_form(&submission)?;
861
862        assert_eq!(response.final_url, submission.action);
863        assert!(response.body.contains("Search results"));
864        Ok(())
865    }
866
867    #[test]
868    fn memory_fetcher_submits_post_forms_by_method_url_and_body()
869    -> Result<(), Box<dyn std::error::Error>> {
870        let form = Form {
871            name: "login".to_owned(),
872            method: "POST".to_owned(),
873            action: "https://example.com/login".to_owned(),
874            inputs: vec![Input {
875                name: "user".to_owned(),
876                kind: "text".to_owned(),
877                value: Some("index".to_owned()),
878                required: true,
879            }],
880            buttons: Vec::new(),
881        };
882        let submission = form.submit(None, &[])?;
883        let mut fetcher = MemoryFetcher::new();
884        fetcher.insert_form_response(
885            &submission,
886            Response {
887                final_url: submission.action.clone(),
888                redirects: Vec::new(),
889                mime_type: Some("text/html".to_owned()),
890                body: "<title>Logged in</title>".to_owned(),
891            },
892        );
893
894        let response = fetcher.submit_form(&submission)?;
895
896        assert_eq!(response.body, "<title>Logged in</title>");
897        Ok(())
898    }
899
900    #[test]
901    fn memory_fetcher_reports_missing_post_form_submission()
902    -> Result<(), Box<dyn std::error::Error>> {
903        let form = Form {
904            name: "login".to_owned(),
905            method: "POST".to_owned(),
906            action: "https://example.com/login".to_owned(),
907            inputs: vec![Input {
908                name: "user".to_owned(),
909                kind: "text".to_owned(),
910                value: Some("index".to_owned()),
911                required: true,
912            }],
913            buttons: Vec::new(),
914        };
915        let submission = form.submit(None, &[])?;
916        let fetcher = MemoryFetcher::new();
917
918        assert_eq!(
919            fetcher.submit_form(&submission),
920            Err(FetchError::NotFound(
921                "POST\thttps://example.com/login\tuser=index".to_owned()
922            ))
923        );
924        Ok(())
925    }
926
927    #[test]
928    fn memory_fetcher_returns_registered_form_error() -> Result<(), Box<dyn std::error::Error>> {
929        let form = Form {
930            name: "login".to_owned(),
931            method: "POST".to_owned(),
932            action: "https://example.com/login".to_owned(),
933            inputs: vec![Input {
934                name: "user".to_owned(),
935                kind: "text".to_owned(),
936                value: Some("index".to_owned()),
937                required: true,
938            }],
939            buttons: Vec::new(),
940        };
941        let submission = form.submit(None, &[])?;
942        let mut fetcher = MemoryFetcher::new();
943        fetcher.insert_form_error(&submission, FetchError::Timeout { timeout_ms: 750 });
944
945        assert_eq!(
946            fetcher.submit_form(&submission),
947            Err(FetchError::Timeout { timeout_ms: 750 })
948        );
949        Ok(())
950    }
951
952    #[test]
953    fn ureq_fetcher_can_be_constructed_without_network_io() {
954        let _fetcher = UreqFetcher::new();
955    }
956
957    #[test]
958    fn defaults_and_accessors_are_stable() {
959        let policy = RetryPolicy::default();
960        assert_eq!(policy.max_attempts(), 3);
961        assert_eq!(policy.backoff_millis(), vec![0, 100, 250]);
962
963        let clamped = RetryPolicy::new(0, [7_u64]);
964        assert_eq!(clamped.max_attempts(), 1);
965        assert_eq!(clamped.backoff_millis(), vec![7]);
966
967        let fetcher = MemoryFetcher::new();
968        let secure = SecureFetcher::new(fetcher);
969        let _inner: &MemoryFetcher = secure.inner();
970
971        let retrying = RetryingFetcher::new(MemoryFetcher::new());
972        assert_eq!(retrying.policy().max_attempts(), 3);
973
974        let _default_fetcher = UreqFetcher::default();
975    }
976
977    #[test]
978    fn validate_text_mime_accepts_none_and_rejects_unknown_types() {
979        assert_eq!(validate_text_mime(None), Ok(()));
980        assert_eq!(validate_text_mime(Some("text/html; charset=utf-8")), Ok(()));
981        assert_eq!(
982            validate_text_mime(Some("image/png")),
983            Err(FetchError::UnsupportedContentType("image/png".to_owned()))
984        );
985    }
986
987    #[test]
988    fn secure_fetcher_rejects_large_response() -> Result<(), Box<dyn std::error::Error>> {
989        let url = IndexUrl::parse("https://example.com/large")?;
990        let mut fetcher = MemoryFetcher::new();
991        fetcher.insert(url.clone(), "12345");
992        let secure = SecureFetcher::with_limits(fetcher, ContentLimits::new(4, 100, 20, 10));
993
994        assert_eq!(
995            secure.fetch(&Request { url }),
996            Err(FetchError::Security(SecurityError::ContentTooLarge {
997                actual_bytes: 5,
998                limit_bytes: 4
999            }))
1000        );
1001        Ok(())
1002    }
1003
1004    #[test]
1005    fn secure_fetcher_rejects_unsupported_content_type() -> Result<(), Box<dyn std::error::Error>> {
1006        let url = IndexUrl::parse("https://example.com/image")?;
1007        let mut fetcher = MemoryFetcher::new();
1008        fetcher.insert_response(
1009            url.clone(),
1010            Response {
1011                final_url: url.clone(),
1012                redirects: Vec::new(),
1013                mime_type: Some("image/png".to_owned()),
1014                body: "not really text".to_owned(),
1015            },
1016        );
1017        let secure = SecureFetcher::new(fetcher);
1018
1019        assert_eq!(
1020            secure.fetch(&Request { url }),
1021            Err(FetchError::UnsupportedContentType("image/png".to_owned()))
1022        );
1023        Ok(())
1024    }
1025
1026    #[test]
1027    fn secure_fetcher_validates_form_submission_responses() -> Result<(), Box<dyn std::error::Error>>
1028    {
1029        let form = Form {
1030            name: "comment".to_owned(),
1031            method: "POST".to_owned(),
1032            action: "https://example.com/comment".to_owned(),
1033            inputs: vec![Input {
1034                name: "body".to_owned(),
1035                kind: "text".to_owned(),
1036                value: Some("hello".to_owned()),
1037                required: true,
1038            }],
1039            buttons: Vec::new(),
1040        };
1041        let submission = form.submit(None, &[])?;
1042        let mut fetcher = MemoryFetcher::new();
1043        fetcher.insert_form_response(
1044            &submission,
1045            Response {
1046                final_url: submission.action.clone(),
1047                redirects: Vec::new(),
1048                mime_type: Some("text/plain; charset=utf-8".to_owned()),
1049                body: "accepted".to_owned(),
1050            },
1051        );
1052        let secure = SecureFetcher::new(fetcher);
1053
1054        let response = secure.submit_form(&submission)?;
1055
1056        assert_eq!(response.body, "accepted");
1057        Ok(())
1058    }
1059
1060    #[test]
1061    fn secure_fetcher_rejects_unsupported_form_response_type()
1062    -> Result<(), Box<dyn std::error::Error>> {
1063        let form = Form {
1064            name: "upload".to_owned(),
1065            method: "POST".to_owned(),
1066            action: "https://example.com/upload".to_owned(),
1067            inputs: Vec::new(),
1068            buttons: Vec::new(),
1069        };
1070        let submission = form.submit(None, &[])?;
1071        let mut fetcher = MemoryFetcher::new();
1072        fetcher.insert_form_response(
1073            &submission,
1074            Response {
1075                final_url: submission.action.clone(),
1076                redirects: Vec::new(),
1077                mime_type: Some("image/png".to_owned()),
1078                body: "not text".to_owned(),
1079            },
1080        );
1081        let secure = SecureFetcher::new(fetcher);
1082
1083        assert_eq!(
1084            secure.submit_form(&submission),
1085            Err(FetchError::UnsupportedContentType("image/png".to_owned()))
1086        );
1087        Ok(())
1088    }
1089
1090    #[test]
1091    fn secure_fetcher_rejects_large_form_response() -> Result<(), Box<dyn std::error::Error>> {
1092        let form = Form {
1093            name: "preview".to_owned(),
1094            method: "POST".to_owned(),
1095            action: "https://example.com/preview".to_owned(),
1096            inputs: Vec::new(),
1097            buttons: Vec::new(),
1098        };
1099        let submission = form.submit(None, &[])?;
1100        let mut fetcher = MemoryFetcher::new();
1101        fetcher.insert_form_response(
1102            &submission,
1103            Response {
1104                final_url: submission.action.clone(),
1105                redirects: Vec::new(),
1106                mime_type: Some("text/html".to_owned()),
1107                body: "12345".to_owned(),
1108            },
1109        );
1110        let secure = SecureFetcher::with_limits(fetcher, ContentLimits::new(4, 100, 20, 10));
1111
1112        assert_eq!(
1113            secure.submit_form(&submission),
1114            Err(FetchError::Security(SecurityError::ContentTooLarge {
1115                actual_bytes: 5,
1116                limit_bytes: 4
1117            }))
1118        );
1119        Ok(())
1120    }
1121
1122    #[derive(Debug)]
1123    struct SequenceFetcher {
1124        responses: RefCell<VecDeque<Result<Response, FetchError>>>,
1125    }
1126
1127    impl SequenceFetcher {
1128        fn new(responses: impl IntoIterator<Item = Result<Response, FetchError>>) -> Self {
1129            Self {
1130                responses: RefCell::new(responses.into_iter().collect()),
1131            }
1132        }
1133    }
1134
1135    impl Fetcher for SequenceFetcher {
1136        fn fetch(&self, request: &Request) -> Result<Response, FetchError> {
1137            self.responses
1138                .borrow_mut()
1139                .pop_front()
1140                .unwrap_or_else(|| Err(FetchError::NotFound(request.url.as_str().to_owned())))
1141        }
1142    }
1143
1144    #[derive(Debug)]
1145    struct SequenceSubmitter {
1146        responses: RefCell<VecDeque<Result<Response, FetchError>>>,
1147    }
1148
1149    impl SequenceSubmitter {
1150        fn new(responses: impl IntoIterator<Item = Result<Response, FetchError>>) -> Self {
1151            Self {
1152                responses: RefCell::new(responses.into_iter().collect()),
1153            }
1154        }
1155    }
1156
1157    impl FormSubmitter for SequenceSubmitter {
1158        fn submit_form(&self, submission: &FormSubmission) -> Result<Response, FetchError> {
1159            self.responses
1160                .borrow_mut()
1161                .pop_front()
1162                .unwrap_or_else(|| Err(FetchError::NotFound(form_submission_key(submission))))
1163        }
1164    }
1165
1166    #[test]
1167    fn retrying_fetcher_retries_transient_failures_without_sleeping()
1168    -> Result<(), Box<dyn std::error::Error>> {
1169        let url = IndexUrl::parse("https://example.com/retry")?;
1170        let response = Response {
1171            final_url: url.clone(),
1172            redirects: Vec::new(),
1173            mime_type: Some("text/html".to_owned()),
1174            body: "<title>Retried</title>".to_owned(),
1175        };
1176        let fetcher = SequenceFetcher::new([
1177            Err(FetchError::Timeout { timeout_ms: 1000 }),
1178            Ok(response.clone()),
1179        ]);
1180        let retrying = RetryingFetcher::with_policy(fetcher, RetryPolicy::new(2, [0, 10]));
1181
1182        assert_eq!(retrying.policy().backoff_millis(), vec![0, 10]);
1183        assert_eq!(retrying.fetch(&Request { url })?, response);
1184        Ok(())
1185    }
1186
1187    #[test]
1188    fn retrying_fetcher_retries_form_submissions() -> Result<(), Box<dyn std::error::Error>> {
1189        let form = Form {
1190            name: "post".to_owned(),
1191            method: "POST".to_owned(),
1192            action: "https://example.com/post".to_owned(),
1193            inputs: Vec::new(),
1194            buttons: Vec::new(),
1195        };
1196        let submission = form.submit(None, &[])?;
1197        let response = Response {
1198            final_url: submission.action.clone(),
1199            redirects: Vec::new(),
1200            mime_type: Some("text/html".to_owned()),
1201            body: "<title>Submitted</title>".to_owned(),
1202        };
1203        let fetcher = SequenceSubmitter::new([
1204            Err(FetchError::Timeout { timeout_ms: 1000 }),
1205            Ok(response.clone()),
1206        ]);
1207        let retrying = RetryingFetcher::with_policy(fetcher, RetryPolicy::new(2, [0, 10]));
1208
1209        assert_eq!(retrying.submit_form(&submission)?, response);
1210        Ok(())
1211    }
1212
1213    #[test]
1214    fn retrying_form_submitter_does_not_retry_non_transient_failures()
1215    -> Result<(), Box<dyn std::error::Error>> {
1216        let form = Form {
1217            name: "post".to_owned(),
1218            method: "POST".to_owned(),
1219            action: "https://example.com/post".to_owned(),
1220            inputs: Vec::new(),
1221            buttons: Vec::new(),
1222        };
1223        let submission = form.submit(None, &[])?;
1224        let submitter = SequenceSubmitter::new([
1225            Err(FetchError::HttpStatus {
1226                status: 401,
1227                url: submission.action.as_str().to_owned(),
1228            }),
1229            Ok(Response {
1230                final_url: submission.action.clone(),
1231                redirects: Vec::new(),
1232                mime_type: Some("text/html".to_owned()),
1233                body: "<title>Should not happen</title>".to_owned(),
1234            }),
1235        ]);
1236        let retrying = RetryingFetcher::with_policy(submitter, RetryPolicy::new(2, [0, 10]));
1237
1238        assert_eq!(
1239            retrying.submit_form(&submission),
1240            Err(FetchError::HttpStatus {
1241                status: 401,
1242                url: submission.action.as_str().to_owned(),
1243            })
1244        );
1245        Ok(())
1246    }
1247
1248    #[test]
1249    fn retrying_fetcher_does_not_retry_non_transient_failures()
1250    -> Result<(), Box<dyn std::error::Error>> {
1251        let url = IndexUrl::parse("https://example.com/missing")?;
1252        let response = Response {
1253            final_url: url.clone(),
1254            redirects: Vec::new(),
1255            mime_type: Some("text/html".to_owned()),
1256            body: "<title>Should not happen</title>".to_owned(),
1257        };
1258        let fetcher = SequenceFetcher::new([
1259            Err(FetchError::HttpStatus {
1260                status: 404,
1261                url: url.as_str().to_owned(),
1262            }),
1263            Ok(response),
1264        ]);
1265        let retrying = RetryingFetcher::with_policy(fetcher, RetryPolicy::new(2, [0, 10]));
1266
1267        assert_eq!(
1268            retrying.fetch(&Request { url }),
1269            Err(FetchError::HttpStatus {
1270                status: 404,
1271                url: "https://example.com/missing".to_owned(),
1272            })
1273        );
1274        Ok(())
1275    }
1276
1277    #[test]
1278    fn cache_fallback_reuses_cached_response_after_transient_failure()
1279    -> Result<(), Box<dyn std::error::Error>> {
1280        let root = temp_path("cache-fallback");
1281        let cache = FileCache::new(&root);
1282        let url = IndexUrl::parse("https://example.com/cached")?;
1283        let cached = Response {
1284            final_url: url.clone(),
1285            redirects: Vec::new(),
1286            mime_type: Some("text/html".to_owned()),
1287            body: "<title>Cached</title>".to_owned(),
1288        };
1289        let path = cache.store(&cached)?;
1290        let mut fetcher = MemoryFetcher::new();
1291        fetcher.insert_error(url.clone(), FetchError::Timeout { timeout_ms: 500 });
1292        let fallback = CacheFallbackFetcher::new(fetcher, cache.clone());
1293
1294        assert_eq!(fallback.fetch(&Request { url })?, cached);
1295
1296        std::fs::remove_file(path)?;
1297        std::fs::remove_dir(&root)?;
1298        Ok(())
1299    }
1300
1301    #[test]
1302    fn cache_fallback_keeps_successes_and_non_transient_failures()
1303    -> Result<(), Box<dyn std::error::Error>> {
1304        let root = temp_path("cache-success");
1305        let cache = FileCache::new(&root);
1306        let url = IndexUrl::parse("https://example.com/live")?;
1307        let response = Response {
1308            final_url: url.clone(),
1309            redirects: Vec::new(),
1310            mime_type: Some("text/html".to_owned()),
1311            body: "<title>Live</title>".to_owned(),
1312        };
1313        let mut fetcher = MemoryFetcher::new();
1314        fetcher.insert_response(url.clone(), response.clone());
1315        let fallback = CacheFallbackFetcher::new(fetcher, cache.clone());
1316
1317        assert!(fallback.cache().path_for(&url).starts_with(cache.root()));
1318        assert_eq!(fallback.fetch(&Request { url: url.clone() })?, response);
1319        assert_eq!(
1320            cache.load(&url)?.map(|cached| cached.body),
1321            Some("<title>Live</title>".to_owned())
1322        );
1323
1324        let mut missing = MemoryFetcher::new();
1325        missing.insert_error(
1326            url.clone(),
1327            FetchError::HttpStatus {
1328                status: 404,
1329                url: url.as_str().to_owned(),
1330            },
1331        );
1332        let fallback = CacheFallbackFetcher::new(missing, cache.clone());
1333        assert_eq!(
1334            fallback.fetch(&Request { url: url.clone() }),
1335            Err(FetchError::HttpStatus {
1336                status: 404,
1337                url: url.as_str().to_owned()
1338            })
1339        );
1340
1341        if let Some(path) = cache.load(&url)?.map(|_| cache.path_for(&url)) {
1342            std::fs::remove_file(path)?;
1343        }
1344        std::fs::remove_dir(&root)?;
1345        Ok(())
1346    }
1347
1348    #[test]
1349    fn cache_fallback_reuses_cached_form_response_after_transient_failure()
1350    -> Result<(), Box<dyn std::error::Error>> {
1351        let root = temp_path("cache-form-fallback");
1352        let cache = FileCache::new(&root);
1353        let form = Form {
1354            name: "search".to_owned(),
1355            method: "GET".to_owned(),
1356            action: "https://example.com/search?q=index".to_owned(),
1357            inputs: Vec::new(),
1358            buttons: Vec::new(),
1359        };
1360        let submission = form.submit(None, &[])?;
1361        let cached = Response {
1362            final_url: submission.action.clone(),
1363            redirects: Vec::new(),
1364            mime_type: Some("text/html".to_owned()),
1365            body: "<title>Cached search</title>".to_owned(),
1366        };
1367        let path = cache.store(&cached)?;
1368        let mut fetcher = MemoryFetcher::new();
1369        fetcher.insert_error(
1370            submission.action.clone(),
1371            FetchError::Timeout { timeout_ms: 500 },
1372        );
1373        let fallback = CacheFallbackFetcher::new(fetcher, cache.clone());
1374
1375        assert_eq!(fallback.submit_form(&submission)?, cached);
1376
1377        std::fs::remove_file(path)?;
1378        std::fs::remove_dir(&root)?;
1379        Ok(())
1380    }
1381
1382    #[test]
1383    fn cache_fallback_returns_transient_error_when_cache_is_missing()
1384    -> Result<(), Box<dyn std::error::Error>> {
1385        let root = temp_path("cache-miss-fallback");
1386        let cache = FileCache::new(&root);
1387        let url = IndexUrl::parse("https://example.com/miss")?;
1388        let mut fetcher = MemoryFetcher::new();
1389        fetcher.insert_error(url.clone(), FetchError::Timeout { timeout_ms: 500 });
1390        let fallback = CacheFallbackFetcher::new(fetcher, cache);
1391
1392        assert_eq!(
1393            fallback.fetch(&Request { url }),
1394            Err(FetchError::Timeout { timeout_ms: 500 })
1395        );
1396        Ok(())
1397    }
1398
1399    #[test]
1400    fn cache_fallback_reports_cache_parse_errors() -> Result<(), Box<dyn std::error::Error>> {
1401        let root = temp_path("cache-parse-error");
1402        let cache = FileCache::new(&root);
1403        let url = IndexUrl::parse("https://example.com/parse")?;
1404        std::fs::create_dir_all(cache.root())?;
1405        std::fs::write(cache.path_for(&url), "broken-cache-content")?;
1406
1407        let mut fetcher = MemoryFetcher::new();
1408        fetcher.insert_error(url.clone(), FetchError::Timeout { timeout_ms: 500 });
1409        let fallback = CacheFallbackFetcher::new(fetcher, cache.clone());
1410
1411        let result = fallback.fetch(&Request { url: url.clone() });
1412        assert!(
1413            matches!(result, Err(FetchError::Cache(error)) if error.contains("cache data is invalid"))
1414        );
1415
1416        std::fs::remove_file(cache.path_for(&url))?;
1417        std::fs::remove_dir(cache.root())?;
1418        Ok(())
1419    }
1420
1421    #[test]
1422    fn cache_fallback_form_submitter_reports_cache_errors_and_misses()
1423    -> Result<(), Box<dyn std::error::Error>> {
1424        let form = Form {
1425            name: "search".to_owned(),
1426            method: "POST".to_owned(),
1427            action: "https://example.com/search".to_owned(),
1428            inputs: Vec::new(),
1429            buttons: Vec::new(),
1430        };
1431        let submission = form.submit(None, &[])?;
1432
1433        let miss_root = temp_path("cache-form-miss");
1434        let miss_cache = FileCache::new(&miss_root);
1435        let mut miss_fetcher = MemoryFetcher::new();
1436        miss_fetcher.insert_form_error(&submission, FetchError::Timeout { timeout_ms: 250 });
1437        let miss_fallback = CacheFallbackFetcher::new(miss_fetcher, miss_cache);
1438        assert_eq!(
1439            miss_fallback.submit_form(&submission),
1440            Err(FetchError::Timeout { timeout_ms: 250 })
1441        );
1442
1443        let parse_root = temp_path("cache-form-parse");
1444        let parse_cache = FileCache::new(&parse_root);
1445        std::fs::create_dir_all(parse_cache.root())?;
1446        std::fs::write(
1447            parse_cache.path_for(&submission.action),
1448            "broken-cache-content",
1449        )?;
1450        let mut parse_fetcher = MemoryFetcher::new();
1451        parse_fetcher.insert_form_error(&submission, FetchError::Timeout { timeout_ms: 250 });
1452        let parse_fallback = CacheFallbackFetcher::new(parse_fetcher, parse_cache.clone());
1453        let parse_result = parse_fallback.submit_form(&submission);
1454        assert!(
1455            matches!(parse_result, Err(FetchError::Cache(error)) if error.contains("cache data is invalid"))
1456        );
1457
1458        std::fs::remove_file(parse_cache.path_for(&submission.action))?;
1459        std::fs::remove_dir(parse_cache.root())?;
1460        Ok(())
1461    }
1462
1463    #[test]
1464    fn secure_fetcher_rejects_redirect_loop() -> Result<(), Box<dyn std::error::Error>> {
1465        let requested = IndexUrl::parse("https://example.com/start")?;
1466        let hop = IndexUrl::parse("https://example.com/hop")?;
1467        let mut fetcher = MemoryFetcher::new();
1468        fetcher.insert_redirect(
1469            requested.clone(),
1470            hop.clone(),
1471            vec![hop.clone()],
1472            "<title>Loop</title>",
1473        );
1474        let secure = SecureFetcher::new(fetcher);
1475
1476        assert_eq!(
1477            secure.fetch(&Request { url: requested }),
1478            Err(FetchError::Security(SecurityError::RedirectLoop {
1479                url: hop
1480            }))
1481        );
1482        Ok(())
1483    }
1484
1485    #[test]
1486    fn file_cache_stores_and_loads_response() -> Result<(), Box<dyn std::error::Error>> {
1487        let root = temp_path("cache");
1488        let cache = FileCache::new(&root);
1489        let url = IndexUrl::parse("https://example.com/docs")?;
1490        let response = Response {
1491            final_url: url.clone(),
1492            redirects: Vec::new(),
1493            mime_type: Some("text/html".to_owned()),
1494            body: "<title>Docs</title>".to_owned(),
1495        };
1496
1497        let path = cache.store(&response)?;
1498        let restored = cache.load(&url)?;
1499        std::fs::remove_file(path)?;
1500        std::fs::remove_dir(&root)?;
1501
1502        assert_eq!(restored, Some(response));
1503        Ok(())
1504    }
1505
1506    #[test]
1507    fn file_cache_returns_none_for_missing_entry() -> Result<(), Box<dyn std::error::Error>> {
1508        let cache = FileCache::new(temp_path("cache-missing"));
1509        let url = IndexUrl::parse("https://example.com/missing")?;
1510
1511        assert_eq!(cache.load(&url)?, None);
1512        assert!(cache.path_for(&url).starts_with(cache.root()));
1513        Ok(())
1514    }
1515
1516    #[test]
1517    fn file_cache_preserves_empty_mime_and_redirects() -> Result<(), Box<dyn std::error::Error>> {
1518        let root = temp_path("cache-redirect");
1519        let cache = FileCache::new(&root);
1520        let requested = IndexUrl::parse("http://example.com")?;
1521        let final_url = IndexUrl::parse("https://example.com")?;
1522        let response = Response {
1523            final_url: final_url.clone(),
1524            redirects: vec![requested],
1525            mime_type: None,
1526            body: "Body\nwith newline".to_owned(),
1527        };
1528
1529        let path = cache.store(&response)?;
1530        let restored = cache.load(&final_url)?;
1531        std::fs::remove_file(path)?;
1532        std::fs::remove_dir(&root)?;
1533
1534        assert_eq!(restored, Some(response));
1535        Ok(())
1536    }
1537
1538    #[test]
1539    fn file_cache_escapes_header_fields() -> Result<(), Box<dyn std::error::Error>> {
1540        let root = temp_path("cache-escaped");
1541        let cache = FileCache::new(&root);
1542        let url = IndexUrl::parse("https://example.com/escaped")?;
1543        let response = Response {
1544            final_url: url.clone(),
1545            redirects: Vec::new(),
1546            mime_type: Some("text/html\twith\\escapes\nand\rreturns".to_owned()),
1547            body: "Body".to_owned(),
1548        };
1549
1550        let path = cache.store(&response)?;
1551        let restored = cache.load(&url)?;
1552        std::fs::remove_file(path)?;
1553        std::fs::remove_dir(&root)?;
1554
1555        assert_eq!(restored, Some(response));
1556        Ok(())
1557    }
1558
1559    #[test]
1560    fn file_cache_rejects_invalid_data() {
1561        assert!(super::deserialize_response("bad").is_err());
1562        assert!(super::deserialize_response("bad\n\nbody").is_err());
1563        assert!(super::deserialize_response("index-cache-v1\nunknown\tvalue\n\nbody").is_err());
1564        assert!(super::deserialize_response("index-cache-v1\nfinal-url\tbad-url\n\nbody").is_err());
1565        assert!(super::deserialize_response("index-cache-v1\nmime\ttext/html\n\nbody").is_err());
1566        assert!(
1567            super::deserialize_response("index-cache-v1\nfinal-url\thttps://example.com\\\n\nbody")
1568                .is_err()
1569        );
1570    }
1571
1572    #[test]
1573    fn unescape_field_rejects_unknown_escape_sequences() {
1574        assert_eq!(unescape_field("\\x"), Err("unknown escape: x".to_owned()));
1575    }
1576
1577    #[test]
1578    fn cache_error_messages_are_actionable() {
1579        let io = super::CacheError::from(std::io::Error::other("disk full")).to_string();
1580        assert!(io.contains("disk full"));
1581
1582        let parse = super::CacheError::Parse("bad record".to_owned()).to_string();
1583        assert!(parse.contains("bad record"));
1584
1585        let url_error = super::CacheError::Url(index_core::UrlError::MissingScheme).to_string();
1586        assert!(url_error.contains("missing a scheme"));
1587    }
1588
1589    #[test]
1590    fn file_cache_paths_use_normalized_url_keys() -> Result<(), Box<dyn std::error::Error>> {
1591        let cache = FileCache::new(temp_path("cache-key"));
1592        let first = IndexUrl::parse("https://EXAMPLE.com:443/docs#first")?;
1593        let second = IndexUrl::parse("https://example.com/docs#second")?;
1594
1595        assert_eq!(cache.path_for(&first), cache.path_for(&second));
1596        Ok(())
1597    }
1598
1599    fn temp_path(name: &str) -> std::path::PathBuf {
1600        let mut path = std::env::temp_dir();
1601        let nanos = SystemTime::now()
1602            .duration_since(UNIX_EPOCH)
1603            .map_or(0, |duration| duration.as_nanos());
1604        path.push(format!("index-http-{name}-{nanos}"));
1605        path
1606    }
1607}