Skip to main content

servo_fetch/
client.rs

1//! Async client with reusable defaults.
2
3use std::sync::Arc;
4use std::time::Duration;
5
6use crate::error::{Error, Result};
7use crate::fetch::{self, FetchOptions, Page};
8use crate::net::sanitize_user_agent;
9use crate::visibility::VisibilityPolicy;
10
11/// Async client carrying reusable fetch defaults.
12#[derive(Debug, Clone)]
13pub struct Client {
14    inner: Arc<ClientInner>,
15}
16
17#[derive(Debug)]
18pub(crate) struct ClientInner {
19    pub(crate) timeout: Duration,
20    pub(crate) settle: Duration,
21    pub(crate) user_agent: Option<String>,
22    pub(crate) visibility: VisibilityPolicy,
23}
24
25impl ClientInner {
26    /// Unset fields fall back to these defaults; values already set on `opts` win.
27    pub(crate) fn apply_defaults(&self, mut opts: FetchOptions) -> FetchOptions {
28        opts.timeout.get_or_insert(self.timeout);
29        opts.settle.get_or_insert(self.settle);
30        opts.visibility.get_or_insert(self.visibility);
31        if let Some(ua) = &self.user_agent {
32            opts.user_agent.get_or_insert_with(|| ua.clone());
33        }
34        opts
35    }
36
37    pub(crate) fn options(&self, url: &str) -> FetchOptions {
38        self.apply_defaults(FetchOptions::new(url))
39    }
40}
41
42impl Default for Client {
43    fn default() -> Self {
44        Self::new()
45    }
46}
47
48impl Client {
49    /// Construct a client with default options.
50    #[must_use]
51    pub fn new() -> Self {
52        Self::builder().build()
53    }
54
55    /// Begin building a client with custom options.
56    pub fn builder() -> ClientBuilder {
57        ClientBuilder::default()
58    }
59
60    /// Fetch a page using the client's default options.
61    pub async fn fetch(&self, url: &str) -> Result<Page> {
62        fetch::fetch(&self.inner.options(url)).await
63    }
64
65    /// Fetch a page with explicit options.
66    pub async fn fetch_with(&self, opts: &FetchOptions) -> Result<Page> {
67        fetch::fetch(&self.inner.apply_defaults(opts.clone())).await
68    }
69
70    /// Fetch and extract readable Markdown.
71    pub async fn markdown(&self, url: &str) -> Result<String> {
72        self.fetch(url).await?.markdown_with_url(url)
73    }
74
75    /// Fetch and extract plain text (`document.body.innerText`).
76    pub async fn text(&self, url: &str) -> Result<String> {
77        Ok(self.fetch(url).await?.inner_text)
78    }
79
80    /// Fetch and extract structured JSON.
81    pub async fn extract_json(&self, url: &str) -> Result<String> {
82        self.fetch(url).await?.extract_json_with_url(url)
83    }
84
85    /// Capture a PNG screenshot of the page.
86    pub async fn screenshot(&self, url: &str, opts: &ScreenshotOptions) -> Result<Vec<u8>> {
87        let fopts = self.inner.apply_defaults(FetchOptions::screenshot(url, opts.full_page));
88        let page = fetch::fetch(&fopts).await?;
89        page.screenshot_png()
90            .map(<[u8]>::to_vec)
91            .ok_or_else(|| Error::screenshot(anyhow::anyhow!("screenshot returned no data"), Some(url.to_string())))
92    }
93
94    /// Execute a JavaScript expression after page load and return the result.
95    pub async fn execute_js(&self, url: &str, expression: impl Into<String>) -> Result<String> {
96        let fopts = self.inner.apply_defaults(FetchOptions::javascript(url, expression));
97        let page = fetch::fetch(&fopts).await?;
98        page.js_result
99            .ok_or_else(|| Error::javascript(anyhow::anyhow!("execute_js returned no result"), Some(url.to_string())))
100    }
101}
102
103/// Options for [`Client::screenshot`].
104#[derive(Debug, Default, Clone)]
105#[non_exhaustive]
106pub struct ScreenshotOptions {
107    /// Capture the full scrollable page (rather than only the viewport).
108    pub full_page: bool,
109}
110
111/// Builder for [`Client`].
112#[must_use = "ClientBuilder does nothing until .build() is called"]
113#[derive(Debug, Default)]
114pub struct ClientBuilder {
115    timeout: Option<Duration>,
116    settle: Option<Duration>,
117    user_agent: Option<String>,
118    visibility: Option<VisibilityPolicy>,
119}
120
121impl ClientBuilder {
122    /// Default page-load timeout (default 30s).
123    pub fn timeout(mut self, timeout: Duration) -> Self {
124        self.timeout = Some(timeout);
125        self
126    }
127
128    /// Default settle wait after the load event (default 0).
129    pub fn settle(mut self, settle: Duration) -> Self {
130        self.settle = Some(settle);
131        self
132    }
133
134    /// Default User-Agent header.
135    pub fn user_agent(mut self, ua: impl Into<String>) -> Self {
136        self.user_agent = Some(sanitize_user_agent(ua.into()));
137        self
138    }
139
140    /// Default visibility policy applied during extraction.
141    pub fn visibility(mut self, policy: VisibilityPolicy) -> Self {
142        self.visibility = Some(policy);
143        self
144    }
145
146    /// Build the [`Client`].
147    #[must_use]
148    pub fn build(self) -> Client {
149        Client {
150            inner: Arc::new(self.build_inner()),
151        }
152    }
153
154    pub(crate) fn build_inner(self) -> ClientInner {
155        ClientInner {
156            timeout: self.timeout.unwrap_or(FetchOptions::DEFAULT_TIMEOUT),
157            settle: self.settle.unwrap_or_default(),
158            user_agent: self.user_agent,
159            visibility: self.visibility.unwrap_or_default(),
160        }
161    }
162}
163
164#[cfg(test)]
165mod tests {
166    use super::*;
167
168    #[test]
169    fn client_default_uses_30s_timeout() {
170        assert_eq!(Client::default().inner.timeout, FetchOptions::DEFAULT_TIMEOUT);
171    }
172
173    #[test]
174    fn client_builder_sets_timeout() {
175        let client = Client::builder().timeout(Duration::from_secs(60)).build();
176        assert_eq!(client.inner.timeout, Duration::from_secs(60));
177    }
178
179    #[test]
180    fn client_builder_sets_settle() {
181        let client = Client::builder().settle(Duration::from_millis(500)).build();
182        assert_eq!(client.inner.settle, Duration::from_millis(500));
183    }
184
185    #[test]
186    fn client_builder_sanitizes_user_agent() {
187        let client = Client::builder().user_agent("Bot\r\nX-Evil: yes").build();
188        assert_eq!(client.inner.user_agent.as_deref(), Some("Bot  X-Evil: yes"));
189    }
190
191    #[test]
192    fn client_options_propagates_defaults() {
193        let client = Client::builder()
194            .timeout(Duration::from_secs(60))
195            .settle(Duration::from_millis(500))
196            .user_agent("MyBot")
197            .build();
198        let opts = client.inner.options("https://example.com");
199        assert_eq!(opts.timeout, Some(Duration::from_secs(60)));
200        assert_eq!(opts.settle, Some(Duration::from_millis(500)));
201        assert_eq!(opts.user_agent.as_deref(), Some("MyBot"));
202    }
203
204    #[test]
205    fn client_apply_defaults_caller_value_wins() {
206        let client = Client::builder()
207            .timeout(Duration::from_secs(60))
208            .user_agent("ClientBot")
209            .build();
210        let user_opts = FetchOptions::new("https://example.com")
211            .timeout(Duration::from_secs(10))
212            .user_agent("UserBot");
213        let merged = client.inner.apply_defaults(user_opts);
214        // Caller's explicit values win
215        assert_eq!(merged.timeout, Some(Duration::from_secs(10)));
216        assert_eq!(merged.user_agent.as_deref(), Some("UserBot"));
217    }
218
219    #[test]
220    fn client_apply_defaults_fills_unset_fields() {
221        let client = Client::builder()
222            .timeout(Duration::from_secs(60))
223            .settle(Duration::from_millis(750))
224            .user_agent("ClientBot")
225            .visibility(VisibilityPolicy::off())
226            .build();
227        let user_opts = FetchOptions::new("https://example.com");
228        let merged = client.inner.apply_defaults(user_opts);
229        // None fields filled with client defaults
230        assert_eq!(merged.timeout, Some(Duration::from_secs(60)));
231        assert_eq!(merged.settle, Some(Duration::from_millis(750)));
232        assert_eq!(merged.user_agent.as_deref(), Some("ClientBot"));
233        assert_eq!(merged.visibility, Some(VisibilityPolicy::off()));
234    }
235
236    #[test]
237    fn client_clone_shares_inner() {
238        let client = Client::new();
239        assert!(Arc::ptr_eq(&client.inner, &client.clone().inner));
240    }
241
242    #[test]
243    fn screenshot_options_default_is_viewport() {
244        assert!(!ScreenshotOptions::default().full_page);
245    }
246
247    #[test]
248    fn assert_send_sync() {
249        fn check<T: Send + Sync>() {}
250        check::<Client>();
251        check::<ClientBuilder>();
252        check::<ScreenshotOptions>();
253    }
254
255    #[test]
256    fn client_builder_sets_visibility() {
257        let client = Client::builder().visibility(VisibilityPolicy::off()).build();
258        assert_eq!(client.inner.visibility, VisibilityPolicy::off());
259    }
260
261    #[tokio::test]
262    async fn client_fetch_invalid_url_returns_invalid_url_error() {
263        let client = Client::new();
264        let err = client.fetch("not a url").await.unwrap_err();
265        assert!(matches!(err, Error::InvalidUrl { .. }), "got: {err:?}");
266    }
267
268    #[tokio::test]
269    async fn client_fetch_private_address_is_rejected() {
270        let client = Client::new();
271        let err = client.fetch("http://127.0.0.1/").await.unwrap_err();
272        assert!(err.is_network(), "got: {err:?}");
273    }
274}