1use std::collections::BTreeMap;
4use std::fmt::{Display, Formatter};
5use std::fs;
6use std::path::{Path, PathBuf};
7use std::time::Duration;
8
9use index_core::{FormMethod, FormSubmission, IndexUrl, UrlError};
10use index_security::{ContentLimits, SecurityError, check_content_size, validate_redirect_chain};
11use ureq::ResponseExt;
12
13#[derive(Debug, Clone, PartialEq, Eq)]
15pub struct Request {
16 pub url: IndexUrl,
18}
19
20#[derive(Debug, Clone, PartialEq, Eq)]
22pub struct Response {
23 pub final_url: IndexUrl,
25 pub redirects: Vec<IndexUrl>,
27 pub mime_type: Option<String>,
29 pub body: String,
31}
32
33#[derive(Debug, Clone, PartialEq, Eq)]
35pub enum FetchError {
36 NotFound(String),
38 NetworkNotImplemented,
40 Network(String),
42 Timeout {
44 timeout_ms: u64,
46 },
47 HttpStatus {
49 status: u16,
51 url: String,
53 },
54 UnsupportedContentType(String),
56 Url(UrlError),
58 Cache(String),
60 Security(SecurityError),
62}
63
64impl Display for FetchError {
65 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
66 match self {
67 Self::NotFound(url) => write!(f, "no fixture response registered for {url}"),
68 Self::NetworkNotImplemented => f.write_str("network fetching is not implemented yet"),
69 Self::Network(error) => write!(f, "network fetch failed: {error}"),
70 Self::Timeout { timeout_ms } => {
71 write!(f, "network fetch timed out after {timeout_ms}ms")
72 }
73 Self::HttpStatus { status, url } => {
74 write!(f, "HTTP status {status} returned for {url}")
75 }
76 Self::UnsupportedContentType(mime_type) => {
77 write!(f, "unsupported response content type: {mime_type}")
78 }
79 Self::Url(error) => write!(f, "network response URL is invalid: {error}"),
80 Self::Cache(error) => write!(f, "cache fallback failed: {error}"),
81 Self::Security(error) => write!(f, "fetch security policy rejected response: {error}"),
82 }
83 }
84}
85
86impl std::error::Error for FetchError {}
87
88impl FetchError {
89 #[must_use]
91 pub fn is_transient(&self) -> bool {
92 matches!(self, Self::Network(_) | Self::Timeout { .. })
93 }
94}
95
96#[derive(Debug, Clone, PartialEq, Eq)]
98pub struct RetryPolicy {
99 max_attempts: u8,
100 backoff: Vec<Duration>,
101}
102
103impl RetryPolicy {
104 #[must_use]
106 pub fn new(max_attempts: u8, backoff_millis: impl IntoIterator<Item = u64>) -> Self {
107 Self {
108 max_attempts: max_attempts.max(1),
109 backoff: backoff_millis
110 .into_iter()
111 .map(Duration::from_millis)
112 .collect(),
113 }
114 }
115
116 #[must_use]
118 pub const fn max_attempts(&self) -> u8 {
119 self.max_attempts
120 }
121
122 #[must_use]
124 pub fn backoff_millis(&self) -> Vec<u64> {
125 self.backoff
126 .iter()
127 .map(|duration| duration.as_millis().min(u128::from(u64::MAX)) as u64)
128 .collect()
129 }
130}
131
132impl Default for RetryPolicy {
133 fn default() -> Self {
134 Self::new(3, [0, 100, 250])
135 }
136}
137
138#[derive(Debug)]
140pub enum CacheError {
141 Io(std::io::Error),
143 Parse(String),
145 Url(index_core::UrlError),
147}
148
149impl From<std::io::Error> for CacheError {
150 fn from(value: std::io::Error) -> Self {
151 Self::Io(value)
152 }
153}
154
155impl Display for CacheError {
156 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
157 match self {
158 Self::Io(error) => write!(f, "cache IO failed: {error}"),
159 Self::Parse(reason) => write!(f, "cache data is invalid: {reason}"),
160 Self::Url(error) => write!(f, "cache URL is invalid: {error}"),
161 }
162 }
163}
164
165impl std::error::Error for CacheError {}
166
167pub trait Fetcher {
169 fn fetch(&self, request: &Request) -> Result<Response, FetchError>;
171}
172
173pub trait FormSubmitter {
175 fn submit_form(&self, submission: &FormSubmission) -> Result<Response, FetchError>;
177}
178
179#[derive(Debug, Clone)]
181pub struct UreqFetcher {
182 agent: ureq::Agent,
183}
184
185impl UreqFetcher {
186 #[must_use]
188 pub fn new() -> Self {
189 let agent: ureq::Agent = ureq::Agent::config_builder()
190 .save_redirect_history(true)
191 .timeout_global(Some(Duration::from_secs(30)))
192 .user_agent("Index/0.1.0")
193 .build()
194 .into();
195 Self { agent }
196 }
197}
198
199impl Default for UreqFetcher {
200 fn default() -> Self {
201 Self::new()
202 }
203}
204
205impl Fetcher for UreqFetcher {
206 fn fetch(&self, request: &Request) -> Result<Response, FetchError> {
207 let mut response = self
208 .agent
209 .get(request.url.as_str())
210 .call()
211 .map_err(|error| FetchError::Network(error.to_string()))?;
212 response_from_ureq(request.url.as_str(), &mut response)
213 }
214}
215
216impl FormSubmitter for UreqFetcher {
217 fn submit_form(&self, submission: &FormSubmission) -> Result<Response, FetchError> {
218 match submission.method {
219 FormMethod::Get => self.fetch(&Request {
220 url: submission.action.clone(),
221 }),
222 FormMethod::Post => {
223 let body = submission.body.as_deref().unwrap_or_default();
224 let mut response = self
225 .agent
226 .post(submission.action.as_str())
227 .header("content-type", "application/x-www-form-urlencoded")
228 .send(body)
229 .map_err(|error| FetchError::Network(error.to_string()))?;
230 response_from_ureq(submission.action.as_str(), &mut response)
231 }
232 }
233 }
234}
235
236fn response_from_ureq(
237 requested_url: &str,
238 response: &mut ureq::http::Response<ureq::Body>,
239) -> Result<Response, FetchError> {
240 let final_url = IndexUrl::parse(response.get_uri().to_string()).map_err(FetchError::Url)?;
241 let redirects = response
242 .get_redirect_history()
243 .unwrap_or(&[])
244 .iter()
245 .filter_map(|uri| {
246 let value = uri.to_string();
247 (value != requested_url && value != final_url.as_str()).then_some(value)
248 })
249 .map(IndexUrl::parse)
250 .collect::<Result<Vec<_>, _>>()
251 .map_err(FetchError::Url)?;
252 let mime_type = response
253 .headers()
254 .get("content-type")
255 .and_then(|value| value.to_str().ok())
256 .map(ToOwned::to_owned);
257 let body = response
258 .body_mut()
259 .read_to_string()
260 .map_err(|error| FetchError::Network(error.to_string()))?;
261
262 Ok(Response {
263 final_url,
264 redirects,
265 mime_type,
266 body,
267 })
268}
269
270#[derive(Debug, Clone)]
272pub struct SecureFetcher<F> {
273 inner: F,
274 limits: ContentLimits,
275}
276
277impl<F> SecureFetcher<F> {
278 #[must_use]
280 pub fn new(inner: F) -> Self {
281 Self {
282 inner,
283 limits: ContentLimits::default(),
284 }
285 }
286
287 #[must_use]
289 pub const fn with_limits(inner: F, limits: ContentLimits) -> Self {
290 Self { inner, limits }
291 }
292
293 #[must_use]
295 pub fn inner(&self) -> &F {
296 &self.inner
297 }
298}
299
300impl<F: Fetcher> Fetcher for SecureFetcher<F> {
301 fn fetch(&self, request: &Request) -> Result<Response, FetchError> {
302 let response = self.inner.fetch(request)?;
303 validate_response(&request.url, response, self.limits)
304 }
305}
306
307impl<F: FormSubmitter> FormSubmitter for SecureFetcher<F> {
308 fn submit_form(&self, submission: &FormSubmission) -> Result<Response, FetchError> {
309 let response = self.inner.submit_form(submission)?;
310 validate_response(&submission.action, response, self.limits)
311 }
312}
313
314fn validate_response(
315 requested_url: &IndexUrl,
316 response: Response,
317 limits: ContentLimits,
318) -> Result<Response, FetchError> {
319 validate_text_mime(response.mime_type.as_deref())?;
320 validate_redirect_chain(
321 requested_url,
322 &response.redirects,
323 &response.final_url,
324 limits,
325 )
326 .map_err(FetchError::Security)?;
327 check_content_size(&response.body, limits).map_err(FetchError::Security)?;
328 Ok(response)
329}
330
331impl<F: FormSubmitter> FormSubmitter for RetryingFetcher<F> {
332 fn submit_form(&self, submission: &FormSubmission) -> Result<Response, FetchError> {
333 let mut attempt = 1;
334 loop {
335 match self.inner.submit_form(submission) {
336 Ok(response) => return Ok(response),
337 Err(error) if error.is_transient() && attempt < self.policy.max_attempts() => {
338 attempt += 1;
339 }
340 Err(error) => return Err(error),
341 }
342 }
343 }
344}
345
346impl<F: FormSubmitter> FormSubmitter for CacheFallbackFetcher<F> {
347 fn submit_form(&self, submission: &FormSubmission) -> Result<Response, FetchError> {
348 match self.inner.submit_form(submission) {
349 Ok(response) => {
350 let _ = self.cache.store(&response);
351 Ok(response)
352 }
353 Err(error) if error.is_transient() => match self.cache.load(&submission.action) {
354 Ok(Some(response)) => Ok(response),
355 Ok(None) => Err(error),
356 Err(cache_error) => Err(FetchError::Cache(cache_error.to_string())),
357 },
358 Err(error) => Err(error),
359 }
360 }
361}
362
363fn validate_text_mime(mime_type: Option<&str>) -> Result<(), FetchError> {
364 let Some(mime_type) = mime_type else {
365 return Ok(());
366 };
367 let normalized = mime_type
368 .split(';')
369 .next()
370 .map(str::trim)
371 .unwrap_or_default()
372 .to_ascii_lowercase();
373 match normalized.as_str() {
374 ""
375 | "text/html"
376 | "text/plain"
377 | "application/xhtml+xml"
378 | "application/xml"
379 | "text/xml" => Ok(()),
380 _ => Err(FetchError::UnsupportedContentType(mime_type.to_owned())),
381 }
382}
383
384#[derive(Debug, Clone)]
386pub struct RetryingFetcher<F> {
387 inner: F,
388 policy: RetryPolicy,
389}
390
391impl<F> RetryingFetcher<F> {
392 #[must_use]
394 pub fn new(inner: F) -> Self {
395 Self {
396 inner,
397 policy: RetryPolicy::default(),
398 }
399 }
400
401 #[must_use]
403 pub fn with_policy(inner: F, policy: RetryPolicy) -> Self {
404 Self { inner, policy }
405 }
406
407 #[must_use]
409 pub fn policy(&self) -> &RetryPolicy {
410 &self.policy
411 }
412}
413
414impl<F: Fetcher> Fetcher for RetryingFetcher<F> {
415 fn fetch(&self, request: &Request) -> Result<Response, FetchError> {
416 let mut attempt = 1;
417 loop {
418 match self.inner.fetch(request) {
419 Ok(response) => return Ok(response),
420 Err(error) if error.is_transient() && attempt < self.policy.max_attempts() => {
421 attempt += 1;
422 }
423 Err(error) => return Err(error),
424 }
425 }
426 }
427}
428
429#[derive(Debug, Clone)]
431pub struct CacheFallbackFetcher<F> {
432 inner: F,
433 cache: FileCache,
434}
435
436impl<F> CacheFallbackFetcher<F> {
437 #[must_use]
439 pub fn new(inner: F, cache: FileCache) -> Self {
440 Self { inner, cache }
441 }
442
443 #[must_use]
445 pub fn cache(&self) -> &FileCache {
446 &self.cache
447 }
448}
449
450impl<F: Fetcher> Fetcher for CacheFallbackFetcher<F> {
451 fn fetch(&self, request: &Request) -> Result<Response, FetchError> {
452 match self.inner.fetch(request) {
453 Ok(response) => {
454 let _ = self.cache.store(&response);
455 Ok(response)
456 }
457 Err(error) if error.is_transient() => match self.cache.load(&request.url) {
458 Ok(Some(response)) => Ok(response),
459 Ok(None) => Err(error),
460 Err(cache_error) => Err(FetchError::Cache(cache_error.to_string())),
461 },
462 Err(error) => Err(error),
463 }
464 }
465}
466
467#[derive(Debug, Clone, Default)]
469pub struct MemoryFetcher {
470 responses: BTreeMap<String, Result<Response, FetchError>>,
471 form_responses: BTreeMap<String, Result<Response, FetchError>>,
472}
473
474impl MemoryFetcher {
475 #[must_use]
477 pub fn new() -> Self {
478 Self::default()
479 }
480
481 pub fn insert(&mut self, url: IndexUrl, body: impl Into<String>) {
483 let response = Response {
484 final_url: url.clone(),
485 redirects: Vec::new(),
486 mime_type: Some("text/html".to_owned()),
487 body: body.into(),
488 };
489 self.responses.insert(url.as_str().to_owned(), Ok(response));
490 }
491
492 pub fn insert_response(&mut self, requested_url: IndexUrl, response: Response) {
494 self.responses
495 .insert(requested_url.as_str().to_owned(), Ok(response));
496 }
497
498 pub fn insert_error(&mut self, url: IndexUrl, error: FetchError) {
500 self.responses.insert(url.as_str().to_owned(), Err(error));
501 }
502
503 pub fn insert_form_response(&mut self, submission: &FormSubmission, response: Response) {
505 self.form_responses
506 .insert(form_submission_key(submission), Ok(response));
507 }
508
509 pub fn insert_form_error(&mut self, submission: &FormSubmission, error: FetchError) {
511 self.form_responses
512 .insert(form_submission_key(submission), Err(error));
513 }
514
515 pub fn insert_redirect(
517 &mut self,
518 requested_url: IndexUrl,
519 final_url: IndexUrl,
520 redirects: Vec<IndexUrl>,
521 body: impl Into<String>,
522 ) {
523 let response = Response {
524 final_url,
525 redirects,
526 mime_type: Some("text/html".to_owned()),
527 body: body.into(),
528 };
529 self.responses
530 .insert(requested_url.as_str().to_owned(), Ok(response));
531 }
532}
533
534impl Fetcher for MemoryFetcher {
535 fn fetch(&self, request: &Request) -> Result<Response, FetchError> {
536 match self.responses.get(request.url.as_str()).cloned() {
537 Some(result) => result,
538 None => Err(FetchError::NotFound(request.url.as_str().to_owned())),
539 }
540 }
541}
542
543impl FormSubmitter for MemoryFetcher {
544 fn submit_form(&self, submission: &FormSubmission) -> Result<Response, FetchError> {
545 if submission.method == FormMethod::Get {
546 return self.fetch(&Request {
547 url: submission.action.clone(),
548 });
549 }
550 match self
551 .form_responses
552 .get(&form_submission_key(submission))
553 .cloned()
554 {
555 Some(result) => result,
556 None => Err(FetchError::NotFound(form_submission_key(submission))),
557 }
558 }
559}
560
561fn form_submission_key(submission: &FormSubmission) -> String {
562 format!(
563 "{}\t{}\t{}",
564 submission.method.as_str(),
565 submission.action,
566 submission.body.as_deref().unwrap_or_default()
567 )
568}
569
570#[derive(Debug, Clone, PartialEq, Eq)]
572pub struct FileCache {
573 root: PathBuf,
574}
575
576impl FileCache {
577 #[must_use]
579 pub fn new(root: impl Into<PathBuf>) -> Self {
580 Self { root: root.into() }
581 }
582
583 #[must_use]
585 pub fn root(&self) -> &Path {
586 &self.root
587 }
588
589 #[must_use]
591 pub fn path_for(&self, url: &IndexUrl) -> PathBuf {
592 self.root.join(format!("{}.cache", url.cache_key()))
593 }
594
595 pub fn store(&self, response: &Response) -> Result<PathBuf, CacheError> {
597 fs::create_dir_all(&self.root).map_err(CacheError::from)?;
598 let path = self.path_for(&response.final_url);
599 fs::write(&path, serialize_response(response)).map_err(CacheError::from)?;
600 Ok(path)
601 }
602
603 pub fn load(&self, url: &IndexUrl) -> Result<Option<Response>, CacheError> {
605 let path = self.path_for(url);
606 if !path.exists() {
607 return Ok(None);
608 }
609 let contents = fs::read_to_string(path).map_err(CacheError::from)?;
610 deserialize_response(&contents).map(Some)
611 }
612}
613
614fn serialize_response(response: &Response) -> String {
615 let mut lines = Vec::new();
616 lines.push("index-cache-v1".to_owned());
617 lines.push(format!(
618 "final-url\t{}",
619 escape_field(response.final_url.as_str())
620 ));
621 for redirect in &response.redirects {
622 lines.push(format!("redirect\t{}", escape_field(redirect.as_str())));
623 }
624 lines.push(format!(
625 "mime\t{}",
626 response
627 .mime_type
628 .as_ref()
629 .map_or_else(String::new, |mime| escape_field(mime))
630 ));
631 lines.push(String::new());
632 lines.push(response.body.clone());
633 lines.join("\n")
634}
635
636fn deserialize_response(contents: &str) -> Result<Response, CacheError> {
637 let Some((header, body)) = contents.split_once("\n\n") else {
638 return Err(CacheError::Parse(
639 "cache body separator is missing".to_owned(),
640 ));
641 };
642
643 let mut lines = header.lines();
644 if lines.next() != Some("index-cache-v1") {
645 return Err(CacheError::Parse("missing cache header".to_owned()));
646 }
647
648 let mut final_url = None;
649 let mut redirects = Vec::new();
650 let mut mime_type = None;
651 for line in lines {
652 let fields: Vec<&str> = line.split('\t').collect();
653 match fields.first().copied() {
654 Some("final-url") if fields.len() == 2 => {
655 final_url = Some(parse_cache_url(fields[1])?);
656 }
657 Some("redirect") if fields.len() == 2 => {
658 redirects.push(parse_cache_url(fields[1])?);
659 }
660 Some("mime") if fields.len() == 2 => {
661 let mime = unescape_field(fields[1]).map_err(CacheError::Parse)?;
662 if !mime.is_empty() {
663 mime_type = Some(mime);
664 }
665 }
666 _ => return Err(CacheError::Parse("invalid cache record".to_owned())),
667 }
668 }
669
670 Ok(Response {
671 final_url: final_url.ok_or_else(|| CacheError::Parse("missing final URL".to_owned()))?,
672 redirects,
673 mime_type,
674 body: body.to_owned(),
675 })
676}
677
678fn parse_cache_url(input: &str) -> Result<IndexUrl, CacheError> {
679 let unescaped = unescape_field(input).map_err(CacheError::Parse)?;
680 IndexUrl::parse(unescaped).map_err(CacheError::Url)
681}
682
683fn escape_field(input: &str) -> String {
684 let mut escaped = String::with_capacity(input.len());
685 for ch in input.chars() {
686 match ch {
687 '\\' => escaped.push_str("\\\\"),
688 '\t' => escaped.push_str("\\t"),
689 '\n' => escaped.push_str("\\n"),
690 '\r' => escaped.push_str("\\r"),
691 _ => escaped.push(ch),
692 }
693 }
694 escaped
695}
696
697fn unescape_field(input: &str) -> Result<String, String> {
698 let mut unescaped = String::with_capacity(input.len());
699 let mut chars = input.chars();
700 while let Some(ch) = chars.next() {
701 if ch != '\\' {
702 unescaped.push(ch);
703 continue;
704 }
705
706 let Some(next) = chars.next() else {
707 return Err("dangling escape".to_owned());
708 };
709 match next {
710 '\\' => unescaped.push('\\'),
711 't' => unescaped.push('\t'),
712 'n' => unescaped.push('\n'),
713 'r' => unescaped.push('\r'),
714 other => return Err(format!("unknown escape: {other}")),
715 }
716 }
717 Ok(unescaped)
718}
719
720#[cfg(test)]
721mod tests {
722 use std::cell::RefCell;
723 use std::collections::VecDeque;
724 use std::time::{SystemTime, UNIX_EPOCH};
725
726 use index_core::{Form, FormSubmission, IndexUrl, Input};
727
728 use super::{
729 CacheFallbackFetcher, FetchError, Fetcher, FileCache, FormSubmitter, MemoryFetcher,
730 Request, Response, RetryPolicy, RetryingFetcher, SecureFetcher, UreqFetcher,
731 form_submission_key, unescape_field, validate_text_mime,
732 };
733 use index_security::{ContentLimits, SecurityError};
734
735 #[test]
736 fn memory_fetcher_returns_registered_response() -> Result<(), Box<dyn std::error::Error>> {
737 let url = IndexUrl::parse("https://example.com")?;
738 let mut fetcher = MemoryFetcher::new();
739 fetcher.insert(url.clone(), "<title>Hello</title>");
740
741 let response = fetcher.fetch(&Request { url });
742 assert!(response.is_ok());
743 assert_eq!(
744 response.map(|r| r.body),
745 Ok("<title>Hello</title>".to_owned())
746 );
747 Ok(())
748 }
749
750 #[test]
751 fn memory_fetcher_returns_not_found_for_unknown_url() -> Result<(), Box<dyn std::error::Error>>
752 {
753 let known = IndexUrl::parse("https://example.com")?;
754 let unknown = IndexUrl::parse("https://example.com/missing")?;
755 let mut fetcher = MemoryFetcher::new();
756 fetcher.insert(known, "<title>Hello</title>");
757
758 let response = fetcher.fetch(&Request { url: unknown });
759 assert_eq!(
760 response,
761 Err(FetchError::NotFound(
762 "https://example.com/missing".to_owned()
763 ))
764 );
765 Ok(())
766 }
767
768 #[test]
769 fn fetch_error_messages_are_actionable() {
770 let not_found = FetchError::NotFound("https://example.com/nope".to_owned()).to_string();
771 assert!(not_found.contains("https://example.com/nope"));
772
773 let network = FetchError::NetworkNotImplemented.to_string();
774 assert!(network.contains("not implemented"));
775
776 let network = FetchError::Network("connection refused".to_owned()).to_string();
777 assert!(network.contains("connection refused"));
778
779 let timeout = FetchError::Timeout { timeout_ms: 2500 }.to_string();
780 assert!(timeout.contains("2500ms"));
781
782 let status = FetchError::HttpStatus {
783 status: 503,
784 url: "https://example.com".to_owned(),
785 }
786 .to_string();
787 assert!(status.contains("503"));
788
789 let mime = FetchError::UnsupportedContentType("image/png".to_owned()).to_string();
790 assert!(mime.contains("image/png"));
791
792 let cache = FetchError::Cache("bad record".to_owned()).to_string();
793 assert!(cache.contains("bad record"));
794
795 let url = FetchError::Url(index_core::UrlError::MissingScheme).to_string();
796 assert!(url.contains("missing a scheme"));
797
798 let security = FetchError::Security(SecurityError::ContentTooLarge {
799 actual_bytes: 5,
800 limit_bytes: 4,
801 })
802 .to_string();
803 assert!(security.contains("security policy"));
804 }
805
806 #[test]
807 fn fetch_errors_classify_transient_failures() {
808 assert!(FetchError::Network("dns lookup failed".to_owned()).is_transient());
809 assert!(FetchError::Timeout { timeout_ms: 1000 }.is_transient());
810 assert!(
811 !FetchError::HttpStatus {
812 status: 404,
813 url: "https://example.com".to_owned()
814 }
815 .is_transient()
816 );
817 assert!(!FetchError::UnsupportedContentType("image/png".to_owned()).is_transient());
818 }
819
820 #[test]
821 fn memory_fetcher_tracks_redirects() -> Result<(), Box<dyn std::error::Error>> {
822 let requested = IndexUrl::parse("http://example.com")?;
823 let hop = IndexUrl::parse("https://example.com")?;
824 let final_url = IndexUrl::parse("https://www.example.com")?;
825 let mut fetcher = MemoryFetcher::new();
826 fetcher.insert_redirect(
827 requested.clone(),
828 final_url.clone(),
829 vec![hop.clone()],
830 "<title>Moved</title>",
831 );
832
833 let response = fetcher.fetch(&Request { url: requested })?;
834
835 assert_eq!(response.final_url, final_url);
836 assert_eq!(response.redirects, vec![hop]);
837 Ok(())
838 }
839
840 #[test]
841 fn memory_fetcher_submits_get_forms_via_resolved_action()
842 -> Result<(), Box<dyn std::error::Error>> {
843 let action = IndexUrl::parse("https://example.com/search")?;
844 let form = Form {
845 name: "search".to_owned(),
846 method: "GET".to_owned(),
847 action: action.as_str().to_owned(),
848 inputs: vec![Input {
849 name: "q".to_owned(),
850 kind: "search".to_owned(),
851 value: Some("index".to_owned()),
852 required: true,
853 }],
854 buttons: Vec::new(),
855 };
856 let submission = form.submit(None, &[])?;
857 let mut fetcher = MemoryFetcher::new();
858 fetcher.insert(submission.action.clone(), "<title>Search results</title>");
859
860 let response = fetcher.submit_form(&submission)?;
861
862 assert_eq!(response.final_url, submission.action);
863 assert!(response.body.contains("Search results"));
864 Ok(())
865 }
866
867 #[test]
868 fn memory_fetcher_submits_post_forms_by_method_url_and_body()
869 -> Result<(), Box<dyn std::error::Error>> {
870 let form = Form {
871 name: "login".to_owned(),
872 method: "POST".to_owned(),
873 action: "https://example.com/login".to_owned(),
874 inputs: vec![Input {
875 name: "user".to_owned(),
876 kind: "text".to_owned(),
877 value: Some("index".to_owned()),
878 required: true,
879 }],
880 buttons: Vec::new(),
881 };
882 let submission = form.submit(None, &[])?;
883 let mut fetcher = MemoryFetcher::new();
884 fetcher.insert_form_response(
885 &submission,
886 Response {
887 final_url: submission.action.clone(),
888 redirects: Vec::new(),
889 mime_type: Some("text/html".to_owned()),
890 body: "<title>Logged in</title>".to_owned(),
891 },
892 );
893
894 let response = fetcher.submit_form(&submission)?;
895
896 assert_eq!(response.body, "<title>Logged in</title>");
897 Ok(())
898 }
899
900 #[test]
901 fn memory_fetcher_reports_missing_post_form_submission()
902 -> Result<(), Box<dyn std::error::Error>> {
903 let form = Form {
904 name: "login".to_owned(),
905 method: "POST".to_owned(),
906 action: "https://example.com/login".to_owned(),
907 inputs: vec![Input {
908 name: "user".to_owned(),
909 kind: "text".to_owned(),
910 value: Some("index".to_owned()),
911 required: true,
912 }],
913 buttons: Vec::new(),
914 };
915 let submission = form.submit(None, &[])?;
916 let fetcher = MemoryFetcher::new();
917
918 assert_eq!(
919 fetcher.submit_form(&submission),
920 Err(FetchError::NotFound(
921 "POST\thttps://example.com/login\tuser=index".to_owned()
922 ))
923 );
924 Ok(())
925 }
926
927 #[test]
928 fn memory_fetcher_returns_registered_form_error() -> Result<(), Box<dyn std::error::Error>> {
929 let form = Form {
930 name: "login".to_owned(),
931 method: "POST".to_owned(),
932 action: "https://example.com/login".to_owned(),
933 inputs: vec![Input {
934 name: "user".to_owned(),
935 kind: "text".to_owned(),
936 value: Some("index".to_owned()),
937 required: true,
938 }],
939 buttons: Vec::new(),
940 };
941 let submission = form.submit(None, &[])?;
942 let mut fetcher = MemoryFetcher::new();
943 fetcher.insert_form_error(&submission, FetchError::Timeout { timeout_ms: 750 });
944
945 assert_eq!(
946 fetcher.submit_form(&submission),
947 Err(FetchError::Timeout { timeout_ms: 750 })
948 );
949 Ok(())
950 }
951
952 #[test]
953 fn ureq_fetcher_can_be_constructed_without_network_io() {
954 let _fetcher = UreqFetcher::new();
955 }
956
957 #[test]
958 fn defaults_and_accessors_are_stable() {
959 let policy = RetryPolicy::default();
960 assert_eq!(policy.max_attempts(), 3);
961 assert_eq!(policy.backoff_millis(), vec![0, 100, 250]);
962
963 let clamped = RetryPolicy::new(0, [7_u64]);
964 assert_eq!(clamped.max_attempts(), 1);
965 assert_eq!(clamped.backoff_millis(), vec![7]);
966
967 let fetcher = MemoryFetcher::new();
968 let secure = SecureFetcher::new(fetcher);
969 let _inner: &MemoryFetcher = secure.inner();
970
971 let retrying = RetryingFetcher::new(MemoryFetcher::new());
972 assert_eq!(retrying.policy().max_attempts(), 3);
973
974 let _default_fetcher = UreqFetcher::default();
975 }
976
977 #[test]
978 fn validate_text_mime_accepts_none_and_rejects_unknown_types() {
979 assert_eq!(validate_text_mime(None), Ok(()));
980 assert_eq!(validate_text_mime(Some("text/html; charset=utf-8")), Ok(()));
981 assert_eq!(
982 validate_text_mime(Some("image/png")),
983 Err(FetchError::UnsupportedContentType("image/png".to_owned()))
984 );
985 }
986
987 #[test]
988 fn secure_fetcher_rejects_large_response() -> Result<(), Box<dyn std::error::Error>> {
989 let url = IndexUrl::parse("https://example.com/large")?;
990 let mut fetcher = MemoryFetcher::new();
991 fetcher.insert(url.clone(), "12345");
992 let secure = SecureFetcher::with_limits(fetcher, ContentLimits::new(4, 100, 20, 10));
993
994 assert_eq!(
995 secure.fetch(&Request { url }),
996 Err(FetchError::Security(SecurityError::ContentTooLarge {
997 actual_bytes: 5,
998 limit_bytes: 4
999 }))
1000 );
1001 Ok(())
1002 }
1003
1004 #[test]
1005 fn secure_fetcher_rejects_unsupported_content_type() -> Result<(), Box<dyn std::error::Error>> {
1006 let url = IndexUrl::parse("https://example.com/image")?;
1007 let mut fetcher = MemoryFetcher::new();
1008 fetcher.insert_response(
1009 url.clone(),
1010 Response {
1011 final_url: url.clone(),
1012 redirects: Vec::new(),
1013 mime_type: Some("image/png".to_owned()),
1014 body: "not really text".to_owned(),
1015 },
1016 );
1017 let secure = SecureFetcher::new(fetcher);
1018
1019 assert_eq!(
1020 secure.fetch(&Request { url }),
1021 Err(FetchError::UnsupportedContentType("image/png".to_owned()))
1022 );
1023 Ok(())
1024 }
1025
1026 #[test]
1027 fn secure_fetcher_validates_form_submission_responses() -> Result<(), Box<dyn std::error::Error>>
1028 {
1029 let form = Form {
1030 name: "comment".to_owned(),
1031 method: "POST".to_owned(),
1032 action: "https://example.com/comment".to_owned(),
1033 inputs: vec![Input {
1034 name: "body".to_owned(),
1035 kind: "text".to_owned(),
1036 value: Some("hello".to_owned()),
1037 required: true,
1038 }],
1039 buttons: Vec::new(),
1040 };
1041 let submission = form.submit(None, &[])?;
1042 let mut fetcher = MemoryFetcher::new();
1043 fetcher.insert_form_response(
1044 &submission,
1045 Response {
1046 final_url: submission.action.clone(),
1047 redirects: Vec::new(),
1048 mime_type: Some("text/plain; charset=utf-8".to_owned()),
1049 body: "accepted".to_owned(),
1050 },
1051 );
1052 let secure = SecureFetcher::new(fetcher);
1053
1054 let response = secure.submit_form(&submission)?;
1055
1056 assert_eq!(response.body, "accepted");
1057 Ok(())
1058 }
1059
1060 #[test]
1061 fn secure_fetcher_rejects_unsupported_form_response_type()
1062 -> Result<(), Box<dyn std::error::Error>> {
1063 let form = Form {
1064 name: "upload".to_owned(),
1065 method: "POST".to_owned(),
1066 action: "https://example.com/upload".to_owned(),
1067 inputs: Vec::new(),
1068 buttons: Vec::new(),
1069 };
1070 let submission = form.submit(None, &[])?;
1071 let mut fetcher = MemoryFetcher::new();
1072 fetcher.insert_form_response(
1073 &submission,
1074 Response {
1075 final_url: submission.action.clone(),
1076 redirects: Vec::new(),
1077 mime_type: Some("image/png".to_owned()),
1078 body: "not text".to_owned(),
1079 },
1080 );
1081 let secure = SecureFetcher::new(fetcher);
1082
1083 assert_eq!(
1084 secure.submit_form(&submission),
1085 Err(FetchError::UnsupportedContentType("image/png".to_owned()))
1086 );
1087 Ok(())
1088 }
1089
1090 #[test]
1091 fn secure_fetcher_rejects_large_form_response() -> Result<(), Box<dyn std::error::Error>> {
1092 let form = Form {
1093 name: "preview".to_owned(),
1094 method: "POST".to_owned(),
1095 action: "https://example.com/preview".to_owned(),
1096 inputs: Vec::new(),
1097 buttons: Vec::new(),
1098 };
1099 let submission = form.submit(None, &[])?;
1100 let mut fetcher = MemoryFetcher::new();
1101 fetcher.insert_form_response(
1102 &submission,
1103 Response {
1104 final_url: submission.action.clone(),
1105 redirects: Vec::new(),
1106 mime_type: Some("text/html".to_owned()),
1107 body: "12345".to_owned(),
1108 },
1109 );
1110 let secure = SecureFetcher::with_limits(fetcher, ContentLimits::new(4, 100, 20, 10));
1111
1112 assert_eq!(
1113 secure.submit_form(&submission),
1114 Err(FetchError::Security(SecurityError::ContentTooLarge {
1115 actual_bytes: 5,
1116 limit_bytes: 4
1117 }))
1118 );
1119 Ok(())
1120 }
1121
1122 #[derive(Debug)]
1123 struct SequenceFetcher {
1124 responses: RefCell<VecDeque<Result<Response, FetchError>>>,
1125 }
1126
1127 impl SequenceFetcher {
1128 fn new(responses: impl IntoIterator<Item = Result<Response, FetchError>>) -> Self {
1129 Self {
1130 responses: RefCell::new(responses.into_iter().collect()),
1131 }
1132 }
1133 }
1134
1135 impl Fetcher for SequenceFetcher {
1136 fn fetch(&self, request: &Request) -> Result<Response, FetchError> {
1137 self.responses
1138 .borrow_mut()
1139 .pop_front()
1140 .unwrap_or_else(|| Err(FetchError::NotFound(request.url.as_str().to_owned())))
1141 }
1142 }
1143
1144 #[derive(Debug)]
1145 struct SequenceSubmitter {
1146 responses: RefCell<VecDeque<Result<Response, FetchError>>>,
1147 }
1148
1149 impl SequenceSubmitter {
1150 fn new(responses: impl IntoIterator<Item = Result<Response, FetchError>>) -> Self {
1151 Self {
1152 responses: RefCell::new(responses.into_iter().collect()),
1153 }
1154 }
1155 }
1156
1157 impl FormSubmitter for SequenceSubmitter {
1158 fn submit_form(&self, submission: &FormSubmission) -> Result<Response, FetchError> {
1159 self.responses
1160 .borrow_mut()
1161 .pop_front()
1162 .unwrap_or_else(|| Err(FetchError::NotFound(form_submission_key(submission))))
1163 }
1164 }
1165
1166 #[test]
1167 fn retrying_fetcher_retries_transient_failures_without_sleeping()
1168 -> Result<(), Box<dyn std::error::Error>> {
1169 let url = IndexUrl::parse("https://example.com/retry")?;
1170 let response = Response {
1171 final_url: url.clone(),
1172 redirects: Vec::new(),
1173 mime_type: Some("text/html".to_owned()),
1174 body: "<title>Retried</title>".to_owned(),
1175 };
1176 let fetcher = SequenceFetcher::new([
1177 Err(FetchError::Timeout { timeout_ms: 1000 }),
1178 Ok(response.clone()),
1179 ]);
1180 let retrying = RetryingFetcher::with_policy(fetcher, RetryPolicy::new(2, [0, 10]));
1181
1182 assert_eq!(retrying.policy().backoff_millis(), vec![0, 10]);
1183 assert_eq!(retrying.fetch(&Request { url })?, response);
1184 Ok(())
1185 }
1186
1187 #[test]
1188 fn retrying_fetcher_retries_form_submissions() -> Result<(), Box<dyn std::error::Error>> {
1189 let form = Form {
1190 name: "post".to_owned(),
1191 method: "POST".to_owned(),
1192 action: "https://example.com/post".to_owned(),
1193 inputs: Vec::new(),
1194 buttons: Vec::new(),
1195 };
1196 let submission = form.submit(None, &[])?;
1197 let response = Response {
1198 final_url: submission.action.clone(),
1199 redirects: Vec::new(),
1200 mime_type: Some("text/html".to_owned()),
1201 body: "<title>Submitted</title>".to_owned(),
1202 };
1203 let fetcher = SequenceSubmitter::new([
1204 Err(FetchError::Timeout { timeout_ms: 1000 }),
1205 Ok(response.clone()),
1206 ]);
1207 let retrying = RetryingFetcher::with_policy(fetcher, RetryPolicy::new(2, [0, 10]));
1208
1209 assert_eq!(retrying.submit_form(&submission)?, response);
1210 Ok(())
1211 }
1212
1213 #[test]
1214 fn retrying_form_submitter_does_not_retry_non_transient_failures()
1215 -> Result<(), Box<dyn std::error::Error>> {
1216 let form = Form {
1217 name: "post".to_owned(),
1218 method: "POST".to_owned(),
1219 action: "https://example.com/post".to_owned(),
1220 inputs: Vec::new(),
1221 buttons: Vec::new(),
1222 };
1223 let submission = form.submit(None, &[])?;
1224 let submitter = SequenceSubmitter::new([
1225 Err(FetchError::HttpStatus {
1226 status: 401,
1227 url: submission.action.as_str().to_owned(),
1228 }),
1229 Ok(Response {
1230 final_url: submission.action.clone(),
1231 redirects: Vec::new(),
1232 mime_type: Some("text/html".to_owned()),
1233 body: "<title>Should not happen</title>".to_owned(),
1234 }),
1235 ]);
1236 let retrying = RetryingFetcher::with_policy(submitter, RetryPolicy::new(2, [0, 10]));
1237
1238 assert_eq!(
1239 retrying.submit_form(&submission),
1240 Err(FetchError::HttpStatus {
1241 status: 401,
1242 url: submission.action.as_str().to_owned(),
1243 })
1244 );
1245 Ok(())
1246 }
1247
1248 #[test]
1249 fn retrying_fetcher_does_not_retry_non_transient_failures()
1250 -> Result<(), Box<dyn std::error::Error>> {
1251 let url = IndexUrl::parse("https://example.com/missing")?;
1252 let response = Response {
1253 final_url: url.clone(),
1254 redirects: Vec::new(),
1255 mime_type: Some("text/html".to_owned()),
1256 body: "<title>Should not happen</title>".to_owned(),
1257 };
1258 let fetcher = SequenceFetcher::new([
1259 Err(FetchError::HttpStatus {
1260 status: 404,
1261 url: url.as_str().to_owned(),
1262 }),
1263 Ok(response),
1264 ]);
1265 let retrying = RetryingFetcher::with_policy(fetcher, RetryPolicy::new(2, [0, 10]));
1266
1267 assert_eq!(
1268 retrying.fetch(&Request { url }),
1269 Err(FetchError::HttpStatus {
1270 status: 404,
1271 url: "https://example.com/missing".to_owned(),
1272 })
1273 );
1274 Ok(())
1275 }
1276
1277 #[test]
1278 fn cache_fallback_reuses_cached_response_after_transient_failure()
1279 -> Result<(), Box<dyn std::error::Error>> {
1280 let root = temp_path("cache-fallback");
1281 let cache = FileCache::new(&root);
1282 let url = IndexUrl::parse("https://example.com/cached")?;
1283 let cached = Response {
1284 final_url: url.clone(),
1285 redirects: Vec::new(),
1286 mime_type: Some("text/html".to_owned()),
1287 body: "<title>Cached</title>".to_owned(),
1288 };
1289 let path = cache.store(&cached)?;
1290 let mut fetcher = MemoryFetcher::new();
1291 fetcher.insert_error(url.clone(), FetchError::Timeout { timeout_ms: 500 });
1292 let fallback = CacheFallbackFetcher::new(fetcher, cache.clone());
1293
1294 assert_eq!(fallback.fetch(&Request { url })?, cached);
1295
1296 std::fs::remove_file(path)?;
1297 std::fs::remove_dir(&root)?;
1298 Ok(())
1299 }
1300
1301 #[test]
1302 fn cache_fallback_keeps_successes_and_non_transient_failures()
1303 -> Result<(), Box<dyn std::error::Error>> {
1304 let root = temp_path("cache-success");
1305 let cache = FileCache::new(&root);
1306 let url = IndexUrl::parse("https://example.com/live")?;
1307 let response = Response {
1308 final_url: url.clone(),
1309 redirects: Vec::new(),
1310 mime_type: Some("text/html".to_owned()),
1311 body: "<title>Live</title>".to_owned(),
1312 };
1313 let mut fetcher = MemoryFetcher::new();
1314 fetcher.insert_response(url.clone(), response.clone());
1315 let fallback = CacheFallbackFetcher::new(fetcher, cache.clone());
1316
1317 assert!(fallback.cache().path_for(&url).starts_with(cache.root()));
1318 assert_eq!(fallback.fetch(&Request { url: url.clone() })?, response);
1319 assert_eq!(
1320 cache.load(&url)?.map(|cached| cached.body),
1321 Some("<title>Live</title>".to_owned())
1322 );
1323
1324 let mut missing = MemoryFetcher::new();
1325 missing.insert_error(
1326 url.clone(),
1327 FetchError::HttpStatus {
1328 status: 404,
1329 url: url.as_str().to_owned(),
1330 },
1331 );
1332 let fallback = CacheFallbackFetcher::new(missing, cache.clone());
1333 assert_eq!(
1334 fallback.fetch(&Request { url: url.clone() }),
1335 Err(FetchError::HttpStatus {
1336 status: 404,
1337 url: url.as_str().to_owned()
1338 })
1339 );
1340
1341 if let Some(path) = cache.load(&url)?.map(|_| cache.path_for(&url)) {
1342 std::fs::remove_file(path)?;
1343 }
1344 std::fs::remove_dir(&root)?;
1345 Ok(())
1346 }
1347
1348 #[test]
1349 fn cache_fallback_reuses_cached_form_response_after_transient_failure()
1350 -> Result<(), Box<dyn std::error::Error>> {
1351 let root = temp_path("cache-form-fallback");
1352 let cache = FileCache::new(&root);
1353 let form = Form {
1354 name: "search".to_owned(),
1355 method: "GET".to_owned(),
1356 action: "https://example.com/search?q=index".to_owned(),
1357 inputs: Vec::new(),
1358 buttons: Vec::new(),
1359 };
1360 let submission = form.submit(None, &[])?;
1361 let cached = Response {
1362 final_url: submission.action.clone(),
1363 redirects: Vec::new(),
1364 mime_type: Some("text/html".to_owned()),
1365 body: "<title>Cached search</title>".to_owned(),
1366 };
1367 let path = cache.store(&cached)?;
1368 let mut fetcher = MemoryFetcher::new();
1369 fetcher.insert_error(
1370 submission.action.clone(),
1371 FetchError::Timeout { timeout_ms: 500 },
1372 );
1373 let fallback = CacheFallbackFetcher::new(fetcher, cache.clone());
1374
1375 assert_eq!(fallback.submit_form(&submission)?, cached);
1376
1377 std::fs::remove_file(path)?;
1378 std::fs::remove_dir(&root)?;
1379 Ok(())
1380 }
1381
1382 #[test]
1383 fn cache_fallback_returns_transient_error_when_cache_is_missing()
1384 -> Result<(), Box<dyn std::error::Error>> {
1385 let root = temp_path("cache-miss-fallback");
1386 let cache = FileCache::new(&root);
1387 let url = IndexUrl::parse("https://example.com/miss")?;
1388 let mut fetcher = MemoryFetcher::new();
1389 fetcher.insert_error(url.clone(), FetchError::Timeout { timeout_ms: 500 });
1390 let fallback = CacheFallbackFetcher::new(fetcher, cache);
1391
1392 assert_eq!(
1393 fallback.fetch(&Request { url }),
1394 Err(FetchError::Timeout { timeout_ms: 500 })
1395 );
1396 Ok(())
1397 }
1398
1399 #[test]
1400 fn cache_fallback_reports_cache_parse_errors() -> Result<(), Box<dyn std::error::Error>> {
1401 let root = temp_path("cache-parse-error");
1402 let cache = FileCache::new(&root);
1403 let url = IndexUrl::parse("https://example.com/parse")?;
1404 std::fs::create_dir_all(cache.root())?;
1405 std::fs::write(cache.path_for(&url), "broken-cache-content")?;
1406
1407 let mut fetcher = MemoryFetcher::new();
1408 fetcher.insert_error(url.clone(), FetchError::Timeout { timeout_ms: 500 });
1409 let fallback = CacheFallbackFetcher::new(fetcher, cache.clone());
1410
1411 let result = fallback.fetch(&Request { url: url.clone() });
1412 assert!(
1413 matches!(result, Err(FetchError::Cache(error)) if error.contains("cache data is invalid"))
1414 );
1415
1416 std::fs::remove_file(cache.path_for(&url))?;
1417 std::fs::remove_dir(cache.root())?;
1418 Ok(())
1419 }
1420
1421 #[test]
1422 fn cache_fallback_form_submitter_reports_cache_errors_and_misses()
1423 -> Result<(), Box<dyn std::error::Error>> {
1424 let form = Form {
1425 name: "search".to_owned(),
1426 method: "POST".to_owned(),
1427 action: "https://example.com/search".to_owned(),
1428 inputs: Vec::new(),
1429 buttons: Vec::new(),
1430 };
1431 let submission = form.submit(None, &[])?;
1432
1433 let miss_root = temp_path("cache-form-miss");
1434 let miss_cache = FileCache::new(&miss_root);
1435 let mut miss_fetcher = MemoryFetcher::new();
1436 miss_fetcher.insert_form_error(&submission, FetchError::Timeout { timeout_ms: 250 });
1437 let miss_fallback = CacheFallbackFetcher::new(miss_fetcher, miss_cache);
1438 assert_eq!(
1439 miss_fallback.submit_form(&submission),
1440 Err(FetchError::Timeout { timeout_ms: 250 })
1441 );
1442
1443 let parse_root = temp_path("cache-form-parse");
1444 let parse_cache = FileCache::new(&parse_root);
1445 std::fs::create_dir_all(parse_cache.root())?;
1446 std::fs::write(
1447 parse_cache.path_for(&submission.action),
1448 "broken-cache-content",
1449 )?;
1450 let mut parse_fetcher = MemoryFetcher::new();
1451 parse_fetcher.insert_form_error(&submission, FetchError::Timeout { timeout_ms: 250 });
1452 let parse_fallback = CacheFallbackFetcher::new(parse_fetcher, parse_cache.clone());
1453 let parse_result = parse_fallback.submit_form(&submission);
1454 assert!(
1455 matches!(parse_result, Err(FetchError::Cache(error)) if error.contains("cache data is invalid"))
1456 );
1457
1458 std::fs::remove_file(parse_cache.path_for(&submission.action))?;
1459 std::fs::remove_dir(parse_cache.root())?;
1460 Ok(())
1461 }
1462
1463 #[test]
1464 fn secure_fetcher_rejects_redirect_loop() -> Result<(), Box<dyn std::error::Error>> {
1465 let requested = IndexUrl::parse("https://example.com/start")?;
1466 let hop = IndexUrl::parse("https://example.com/hop")?;
1467 let mut fetcher = MemoryFetcher::new();
1468 fetcher.insert_redirect(
1469 requested.clone(),
1470 hop.clone(),
1471 vec![hop.clone()],
1472 "<title>Loop</title>",
1473 );
1474 let secure = SecureFetcher::new(fetcher);
1475
1476 assert_eq!(
1477 secure.fetch(&Request { url: requested }),
1478 Err(FetchError::Security(SecurityError::RedirectLoop {
1479 url: hop
1480 }))
1481 );
1482 Ok(())
1483 }
1484
1485 #[test]
1486 fn file_cache_stores_and_loads_response() -> Result<(), Box<dyn std::error::Error>> {
1487 let root = temp_path("cache");
1488 let cache = FileCache::new(&root);
1489 let url = IndexUrl::parse("https://example.com/docs")?;
1490 let response = Response {
1491 final_url: url.clone(),
1492 redirects: Vec::new(),
1493 mime_type: Some("text/html".to_owned()),
1494 body: "<title>Docs</title>".to_owned(),
1495 };
1496
1497 let path = cache.store(&response)?;
1498 let restored = cache.load(&url)?;
1499 std::fs::remove_file(path)?;
1500 std::fs::remove_dir(&root)?;
1501
1502 assert_eq!(restored, Some(response));
1503 Ok(())
1504 }
1505
1506 #[test]
1507 fn file_cache_returns_none_for_missing_entry() -> Result<(), Box<dyn std::error::Error>> {
1508 let cache = FileCache::new(temp_path("cache-missing"));
1509 let url = IndexUrl::parse("https://example.com/missing")?;
1510
1511 assert_eq!(cache.load(&url)?, None);
1512 assert!(cache.path_for(&url).starts_with(cache.root()));
1513 Ok(())
1514 }
1515
1516 #[test]
1517 fn file_cache_preserves_empty_mime_and_redirects() -> Result<(), Box<dyn std::error::Error>> {
1518 let root = temp_path("cache-redirect");
1519 let cache = FileCache::new(&root);
1520 let requested = IndexUrl::parse("http://example.com")?;
1521 let final_url = IndexUrl::parse("https://example.com")?;
1522 let response = Response {
1523 final_url: final_url.clone(),
1524 redirects: vec![requested],
1525 mime_type: None,
1526 body: "Body\nwith newline".to_owned(),
1527 };
1528
1529 let path = cache.store(&response)?;
1530 let restored = cache.load(&final_url)?;
1531 std::fs::remove_file(path)?;
1532 std::fs::remove_dir(&root)?;
1533
1534 assert_eq!(restored, Some(response));
1535 Ok(())
1536 }
1537
1538 #[test]
1539 fn file_cache_escapes_header_fields() -> Result<(), Box<dyn std::error::Error>> {
1540 let root = temp_path("cache-escaped");
1541 let cache = FileCache::new(&root);
1542 let url = IndexUrl::parse("https://example.com/escaped")?;
1543 let response = Response {
1544 final_url: url.clone(),
1545 redirects: Vec::new(),
1546 mime_type: Some("text/html\twith\\escapes\nand\rreturns".to_owned()),
1547 body: "Body".to_owned(),
1548 };
1549
1550 let path = cache.store(&response)?;
1551 let restored = cache.load(&url)?;
1552 std::fs::remove_file(path)?;
1553 std::fs::remove_dir(&root)?;
1554
1555 assert_eq!(restored, Some(response));
1556 Ok(())
1557 }
1558
1559 #[test]
1560 fn file_cache_rejects_invalid_data() {
1561 assert!(super::deserialize_response("bad").is_err());
1562 assert!(super::deserialize_response("bad\n\nbody").is_err());
1563 assert!(super::deserialize_response("index-cache-v1\nunknown\tvalue\n\nbody").is_err());
1564 assert!(super::deserialize_response("index-cache-v1\nfinal-url\tbad-url\n\nbody").is_err());
1565 assert!(super::deserialize_response("index-cache-v1\nmime\ttext/html\n\nbody").is_err());
1566 assert!(
1567 super::deserialize_response("index-cache-v1\nfinal-url\thttps://example.com\\\n\nbody")
1568 .is_err()
1569 );
1570 }
1571
1572 #[test]
1573 fn unescape_field_rejects_unknown_escape_sequences() {
1574 assert_eq!(unescape_field("\\x"), Err("unknown escape: x".to_owned()));
1575 }
1576
1577 #[test]
1578 fn cache_error_messages_are_actionable() {
1579 let io = super::CacheError::from(std::io::Error::other("disk full")).to_string();
1580 assert!(io.contains("disk full"));
1581
1582 let parse = super::CacheError::Parse("bad record".to_owned()).to_string();
1583 assert!(parse.contains("bad record"));
1584
1585 let url_error = super::CacheError::Url(index_core::UrlError::MissingScheme).to_string();
1586 assert!(url_error.contains("missing a scheme"));
1587 }
1588
1589 #[test]
1590 fn file_cache_paths_use_normalized_url_keys() -> Result<(), Box<dyn std::error::Error>> {
1591 let cache = FileCache::new(temp_path("cache-key"));
1592 let first = IndexUrl::parse("https://EXAMPLE.com:443/docs#first")?;
1593 let second = IndexUrl::parse("https://example.com/docs#second")?;
1594
1595 assert_eq!(cache.path_for(&first), cache.path_for(&second));
1596 Ok(())
1597 }
1598
1599 fn temp_path(name: &str) -> std::path::PathBuf {
1600 let mut path = std::env::temp_dir();
1601 let nanos = SystemTime::now()
1602 .duration_since(UNIX_EPOCH)
1603 .map_or(0, |duration| duration.as_nanos());
1604 path.push(format!("index-http-{name}-{nanos}"));
1605 path
1606 }
1607}