Skip to main content

rover/fetcher/
mod.rs

1//! HTTP fetching, charset detection, SSRF enforcement.
2
3pub mod cache_control;
4pub mod cached;
5pub mod canonical;
6pub mod challenge;
7pub mod charset;
8pub mod client;
9pub mod dns;
10pub mod fetch;
11pub mod har;
12#[cfg(feature = "headless")]
13pub mod headless;
14pub mod ssrf;
15pub mod ttl;
16
17pub mod concurrency;
18pub mod rate_limit;
19pub mod retry;
20pub mod robots;
21
22pub use cached::{
23    CacheStatus, CachedFetch, ExtractResult, FetchOptions, HeadlessMode, fetch_with_cache,
24};
25pub use fetch::FetchedPage;
26
27use thiserror::Error;
28
29#[derive(Debug, Error)]
30pub enum FetcherError {
31    #[error("ssrf violation: {0}")]
32    Ssrf(#[from] ssrf::SsrfError),
33
34    #[error("http error: {0}")]
35    Http(#[from] reqwest::Error),
36
37    #[error("invalid url: {0}")]
38    Url(#[from] url::ParseError),
39
40    #[error("dns lookup failed for {host}: {source}")]
41    Dns {
42        host: String,
43        source: std::io::Error,
44    },
45
46    #[error("response decoding failed")]
47    Decode,
48
49    #[error("HTTP {status} from {url}")]
50    Status { status: u16, url: String },
51
52    #[error(
53        "{provider} bot-protection challenge blocked {url}; the site serves a JavaScript challenge that a plain HTTP fetch cannot solve. Enable headless rendering (build with `--features headless` and install a Chrome/Chromium browser) so Rover can render it in a real browser."
54    )]
55    BotChallenge { url: String, provider: String },
56
57    #[error("storage error: {0}")]
58    Storage(#[from] crate::storage::StorageError),
59
60    #[error("extractor error: {0}")]
61    Extract(#[from] crate::extractor::ExtractorError),
62
63    #[error("retries exhausted after {attempts} attempts; last error: {last}")]
64    RetryExhausted {
65        attempts: u8,
66        last: Box<FetcherError>,
67    },
68
69    #[error("rate limited: server requested wait of {retry_after_secs}s")]
70    RateLimited { retry_after_secs: u64 },
71
72    #[error("robots.txt disallows {url} for user-agent {ua}")]
73    RobotsDisallowed { url: String, ua: String },
74
75    #[error("robots.txt fetch failed for {host}: {source}")]
76    RobotsFetchFailed {
77        host: String,
78        #[source]
79        source: Box<FetcherError>,
80    },
81
82    #[error("fetch deferred to retry task {task_id}")]
83    Deferred { task_id: String },
84
85    #[error("headless feature not compiled into this binary")]
86    HeadlessFeatureNotCompiled,
87
88    #[error("headless renderer is not wired into this fetcher")]
89    HeadlessRendererUnavailable,
90
91    #[cfg(feature = "headless")]
92    #[error("headless render failed: {0}")]
93    Headless(#[from] crate::fetcher::headless::HeadlessError),
94}