Skip to main content

servo_fetch/
client.rs

1//! Async client with reusable defaults.
2
3use std::sync::Arc;
4use std::time::Duration;
5
6use crate::error::{Error, Result};
7use crate::fetch::{self, FetchOptions, Page};
8use crate::net::sanitize_user_agent;
9use crate::visibility::VisibilityPolicy;
10
11/// Async client carrying reusable fetch defaults.
12#[derive(Debug, Clone)]
13pub struct Client {
14    inner: Arc<ClientInner>,
15}
16
17#[derive(Debug)]
18pub(crate) struct ClientInner {
19    pub(crate) timeout: Duration,
20    pub(crate) settle: Duration,
21    pub(crate) user_agent: Option<String>,
22    pub(crate) visibility: VisibilityPolicy,
23}
24
25impl Default for Client {
26    fn default() -> Self {
27        Self::new()
28    }
29}
30
31impl Client {
32    /// Construct a client with default options.
33    #[must_use]
34    pub fn new() -> Self {
35        Self::builder().build()
36    }
37
38    /// Begin building a client with custom options.
39    pub fn builder() -> ClientBuilder {
40        ClientBuilder::default()
41    }
42
43    /// Fetch a page using the client's default options.
44    pub async fn fetch(&self, url: &str) -> Result<Page> {
45        fetch::fetch(&self.options(url)).await
46    }
47
48    /// Fetch a page with explicit options.
49    ///
50    /// Unset fields on `opts` fall back to the client's defaults; explicit
51    /// values on `opts` always win.
52    pub async fn fetch_with(&self, opts: &FetchOptions) -> Result<Page> {
53        fetch::fetch(&self.apply_defaults(opts)).await
54    }
55
56    /// Fetch and extract readable Markdown.
57    pub async fn markdown(&self, url: &str) -> Result<String> {
58        self.fetch(url).await?.markdown_with_url(url)
59    }
60
61    /// Fetch and extract plain text (`document.body.innerText`).
62    pub async fn text(&self, url: &str) -> Result<String> {
63        Ok(self.fetch(url).await?.inner_text)
64    }
65
66    /// Fetch and extract structured JSON.
67    pub async fn extract_json(&self, url: &str) -> Result<String> {
68        self.fetch(url).await?.extract_json_with_url(url)
69    }
70
71    /// Capture a PNG screenshot of the page.
72    pub async fn screenshot(&self, url: &str, opts: &ScreenshotOptions) -> Result<Vec<u8>> {
73        let fopts = self.apply_defaults(&FetchOptions::screenshot(url, opts.full_page));
74        let page = fetch::fetch(&fopts).await?;
75        page.screenshot_png()
76            .map(<[u8]>::to_vec)
77            .ok_or_else(|| Error::screenshot(anyhow::anyhow!("screenshot returned no data"), Some(url.to_string())))
78    }
79
80    /// Execute a JavaScript expression after page load and return the result.
81    pub async fn execute_js(&self, url: &str, expression: impl Into<String>) -> Result<String> {
82        let fopts = self.apply_defaults(&FetchOptions::javascript(url, expression));
83        let page = fetch::fetch(&fopts).await?;
84        page.js_result
85            .ok_or_else(|| Error::javascript(anyhow::anyhow!("execute_js returned no result"), Some(url.to_string())))
86    }
87
88    fn apply_defaults(&self, opts: &FetchOptions) -> FetchOptions {
89        let mut opts = opts.clone();
90        if opts.timeout.is_none() {
91            opts.timeout = Some(self.inner.timeout);
92        }
93        if opts.settle.is_none() {
94            opts.settle = Some(self.inner.settle);
95        }
96        if opts.visibility.is_none() {
97            opts.visibility = Some(self.inner.visibility);
98        }
99        if opts.user_agent.is_none()
100            && let Some(ua) = self.inner.user_agent.as_deref()
101        {
102            opts.user_agent = Some(ua.to_owned());
103        }
104        opts
105    }
106
107    fn options(&self, url: &str) -> FetchOptions {
108        self.apply_defaults(&FetchOptions::new(url))
109    }
110}
111
112/// Options for [`Client::screenshot`].
113#[derive(Debug, Default, Clone)]
114#[non_exhaustive]
115pub struct ScreenshotOptions {
116    /// Capture the full scrollable page (rather than only the viewport).
117    pub full_page: bool,
118}
119
120/// Builder for [`Client`].
121#[must_use = "ClientBuilder does nothing until .build() is called"]
122#[derive(Debug, Default)]
123pub struct ClientBuilder {
124    timeout: Option<Duration>,
125    settle: Option<Duration>,
126    user_agent: Option<String>,
127    visibility: Option<VisibilityPolicy>,
128}
129
130impl ClientBuilder {
131    /// Default page-load timeout (default 30s).
132    pub fn timeout(mut self, timeout: Duration) -> Self {
133        self.timeout = Some(timeout);
134        self
135    }
136
137    /// Default settle wait after the load event (default 0).
138    pub fn settle(mut self, settle: Duration) -> Self {
139        self.settle = Some(settle);
140        self
141    }
142
143    /// Default User-Agent header.
144    pub fn user_agent(mut self, ua: impl Into<String>) -> Self {
145        self.user_agent = Some(sanitize_user_agent(ua.into()));
146        self
147    }
148
149    /// Default visibility policy applied during extraction.
150    pub fn visibility(mut self, policy: VisibilityPolicy) -> Self {
151        self.visibility = Some(policy);
152        self
153    }
154
155    /// Build the [`Client`].
156    #[must_use]
157    pub fn build(self) -> Client {
158        Client {
159            inner: Arc::new(self.build_inner()),
160        }
161    }
162
163    pub(crate) fn build_inner(self) -> ClientInner {
164        ClientInner {
165            timeout: self.timeout.unwrap_or(FetchOptions::DEFAULT_TIMEOUT),
166            settle: self.settle.unwrap_or_default(),
167            user_agent: self.user_agent,
168            visibility: self.visibility.unwrap_or_default(),
169        }
170    }
171}
172
173#[cfg(test)]
174mod tests {
175    use super::*;
176
177    #[test]
178    fn client_default_uses_30s_timeout() {
179        assert_eq!(Client::default().inner.timeout, FetchOptions::DEFAULT_TIMEOUT);
180    }
181
182    #[test]
183    fn client_builder_sets_timeout() {
184        let client = Client::builder().timeout(Duration::from_secs(60)).build();
185        assert_eq!(client.inner.timeout, Duration::from_secs(60));
186    }
187
188    #[test]
189    fn client_builder_sets_settle() {
190        let client = Client::builder().settle(Duration::from_millis(500)).build();
191        assert_eq!(client.inner.settle, Duration::from_millis(500));
192    }
193
194    #[test]
195    fn client_builder_sanitizes_user_agent() {
196        let client = Client::builder().user_agent("Bot\r\nX-Evil: yes").build();
197        assert_eq!(client.inner.user_agent.as_deref(), Some("Bot  X-Evil: yes"));
198    }
199
200    #[test]
201    fn client_options_propagates_defaults() {
202        let client = Client::builder()
203            .timeout(Duration::from_secs(60))
204            .settle(Duration::from_millis(500))
205            .user_agent("MyBot")
206            .build();
207        let opts = client.options("https://example.com");
208        assert_eq!(opts.timeout, Some(Duration::from_secs(60)));
209        assert_eq!(opts.settle, Some(Duration::from_millis(500)));
210        assert_eq!(opts.user_agent.as_deref(), Some("MyBot"));
211    }
212
213    #[test]
214    fn client_apply_defaults_caller_value_wins() {
215        let client = Client::builder()
216            .timeout(Duration::from_secs(60))
217            .user_agent("ClientBot")
218            .build();
219        let user_opts = FetchOptions::new("https://example.com")
220            .timeout(Duration::from_secs(10))
221            .user_agent("UserBot");
222        let merged = client.apply_defaults(&user_opts);
223        // Caller's explicit values win
224        assert_eq!(merged.timeout, Some(Duration::from_secs(10)));
225        assert_eq!(merged.user_agent.as_deref(), Some("UserBot"));
226    }
227
228    #[test]
229    fn client_apply_defaults_fills_unset_fields() {
230        let client = Client::builder()
231            .timeout(Duration::from_secs(60))
232            .settle(Duration::from_millis(750))
233            .user_agent("ClientBot")
234            .visibility(VisibilityPolicy::off())
235            .build();
236        let user_opts = FetchOptions::new("https://example.com");
237        let merged = client.apply_defaults(&user_opts);
238        // None fields filled with client defaults
239        assert_eq!(merged.timeout, Some(Duration::from_secs(60)));
240        assert_eq!(merged.settle, Some(Duration::from_millis(750)));
241        assert_eq!(merged.user_agent.as_deref(), Some("ClientBot"));
242        assert_eq!(merged.visibility, Some(VisibilityPolicy::off()));
243    }
244
245    #[test]
246    fn client_clone_shares_inner() {
247        let client = Client::new();
248        assert!(Arc::ptr_eq(&client.inner, &client.clone().inner));
249    }
250
251    #[test]
252    fn screenshot_options_default_is_viewport() {
253        assert!(!ScreenshotOptions::default().full_page);
254    }
255
256    #[test]
257    fn assert_send_sync() {
258        fn check<T: Send + Sync>() {}
259        check::<Client>();
260        check::<ClientBuilder>();
261        check::<ScreenshotOptions>();
262    }
263
264    #[test]
265    fn client_builder_sets_visibility() {
266        let client = Client::builder().visibility(VisibilityPolicy::off()).build();
267        assert_eq!(client.inner.visibility, VisibilityPolicy::off());
268    }
269
270    #[tokio::test]
271    async fn client_fetch_invalid_url_returns_invalid_url_error() {
272        let client = Client::new();
273        let err = client.fetch("not a url").await.unwrap_err();
274        assert!(matches!(err, Error::InvalidUrl { .. }), "got: {err:?}");
275    }
276
277    #[tokio::test]
278    async fn client_fetch_private_address_is_rejected() {
279        let client = Client::new();
280        let err = client.fetch("http://127.0.0.1/").await.unwrap_err();
281        assert!(err.is_network(), "got: {err:?}");
282    }
283}