Skip to main content

servo_fetch/blocking/
client.rs

1//! Blocking client mirror of [`crate::Client`].
2
3use std::sync::Arc;
4use std::time::Duration;
5
6use crate::error::{Error, Result};
7use crate::fetch::{FetchOptions, Page, fetch_blocking};
8use crate::visibility::VisibilityPolicy;
9use crate::{ScreenshotOptions, client};
10
11/// Blocking client carrying reusable fetch defaults.
12#[derive(Debug, Clone)]
13pub struct Client {
14    inner: Arc<client::ClientInner>,
15}
16
17impl Default for Client {
18    fn default() -> Self {
19        Self::new()
20    }
21}
22
23impl Client {
24    /// Construct a client with default options.
25    #[must_use]
26    pub fn new() -> Self {
27        Self::builder().build()
28    }
29
30    /// Begin building a client with custom options.
31    pub fn builder() -> ClientBuilder {
32        ClientBuilder::default()
33    }
34
35    /// Fetch a page using the client's default options.
36    pub fn fetch(&self, url: &str) -> Result<Page> {
37        fetch_blocking(&self.options(url))
38    }
39
40    /// Fetch a page with explicit options.
41    ///
42    /// Unset fields on `opts` fall back to the client's defaults; explicit
43    /// values on `opts` always win.
44    pub fn fetch_with(&self, opts: &FetchOptions) -> Result<Page> {
45        fetch_blocking(&self.apply_defaults(opts))
46    }
47
48    /// Fetch and extract readable Markdown.
49    pub fn markdown(&self, url: &str) -> Result<String> {
50        self.fetch(url)?.markdown_with_url(url)
51    }
52
53    /// Fetch and extract plain text (`document.body.innerText`).
54    pub fn text(&self, url: &str) -> Result<String> {
55        Ok(self.fetch(url)?.inner_text)
56    }
57
58    /// Fetch and extract structured JSON.
59    pub fn extract_json(&self, url: &str) -> Result<String> {
60        self.fetch(url)?.extract_json_with_url(url)
61    }
62
63    /// Capture a PNG screenshot of the page.
64    pub fn screenshot(&self, url: &str, opts: &ScreenshotOptions) -> Result<Vec<u8>> {
65        let fopts = self.apply_defaults(&FetchOptions::screenshot(url, opts.full_page));
66        let page = fetch_blocking(&fopts)?;
67        page.screenshot_png()
68            .map(<[u8]>::to_vec)
69            .ok_or_else(|| Error::screenshot(anyhow::anyhow!("screenshot returned no data"), Some(url.to_string())))
70    }
71
72    /// Execute a JavaScript expression after page load and return the result.
73    pub fn execute_js(&self, url: &str, expression: impl Into<String>) -> Result<String> {
74        let fopts = self.apply_defaults(&FetchOptions::javascript(url, expression));
75        let page = fetch_blocking(&fopts)?;
76        page.js_result
77            .ok_or_else(|| Error::javascript(anyhow::anyhow!("execute_js returned no result"), Some(url.to_string())))
78    }
79
80    fn apply_defaults(&self, opts: &FetchOptions) -> FetchOptions {
81        let mut opts = opts.clone();
82        if opts.timeout.is_none() {
83            opts.timeout = Some(self.inner.timeout);
84        }
85        if opts.settle.is_none() {
86            opts.settle = Some(self.inner.settle);
87        }
88        if opts.visibility.is_none() {
89            opts.visibility = Some(self.inner.visibility);
90        }
91        if opts.user_agent.is_none()
92            && let Some(ua) = self.inner.user_agent.as_deref()
93        {
94            opts.user_agent = Some(ua.to_owned());
95        }
96        opts
97    }
98
99    fn options(&self, url: &str) -> FetchOptions {
100        self.apply_defaults(&FetchOptions::new(url))
101    }
102}
103
104/// Builder for [`Client`].
105#[must_use = "ClientBuilder does nothing until .build() is called"]
106#[derive(Debug, Default)]
107pub struct ClientBuilder {
108    inner: client::ClientBuilder,
109}
110
111impl ClientBuilder {
112    /// Default page-load timeout (default 30s).
113    pub fn timeout(mut self, timeout: Duration) -> Self {
114        self.inner = self.inner.timeout(timeout);
115        self
116    }
117
118    /// Default settle wait after the load event (default 0).
119    pub fn settle(mut self, settle: Duration) -> Self {
120        self.inner = self.inner.settle(settle);
121        self
122    }
123
124    /// Default User-Agent header.
125    pub fn user_agent(mut self, ua: impl Into<String>) -> Self {
126        self.inner = self.inner.user_agent(ua);
127        self
128    }
129
130    /// Default visibility policy applied during extraction.
131    pub fn visibility(mut self, policy: VisibilityPolicy) -> Self {
132        self.inner = self.inner.visibility(policy);
133        self
134    }
135
136    /// Build the [`Client`].
137    #[must_use]
138    pub fn build(self) -> Client {
139        Client {
140            inner: Arc::new(self.inner.build_inner()),
141        }
142    }
143}
144
145#[cfg(test)]
146mod tests {
147    use super::*;
148
149    #[test]
150    fn client_default_uses_30s_timeout() {
151        assert_eq!(Client::default().inner.timeout, Duration::from_secs(30));
152    }
153
154    #[test]
155    fn client_builder_sets_timeout() {
156        let client = Client::builder().timeout(Duration::from_secs(60)).build();
157        assert_eq!(client.inner.timeout, Duration::from_secs(60));
158    }
159
160    #[test]
161    fn client_builder_sets_settle() {
162        let client = Client::builder().settle(Duration::from_millis(500)).build();
163        assert_eq!(client.inner.settle, Duration::from_millis(500));
164    }
165
166    #[test]
167    fn client_builder_sets_visibility() {
168        let client = Client::builder().visibility(VisibilityPolicy::off()).build();
169        assert_eq!(client.inner.visibility, VisibilityPolicy::off());
170    }
171
172    #[test]
173    fn client_builder_sanitizes_user_agent() {
174        let client = Client::builder().user_agent("Bot\r\nX-Evil: yes").build();
175        assert_eq!(client.inner.user_agent.as_deref(), Some("Bot  X-Evil: yes"));
176    }
177
178    #[test]
179    fn client_clone_shares_inner() {
180        let client = Client::new();
181        assert!(Arc::ptr_eq(&client.inner, &client.clone().inner));
182    }
183
184    #[test]
185    fn assert_send_sync() {
186        fn check<T: Send + Sync>() {}
187        check::<Client>();
188        check::<ClientBuilder>();
189    }
190
191    #[test]
192    fn fetch_invalid_url_returns_invalid_url_error() {
193        let client = Client::new();
194        let err = client.fetch("not a url").unwrap_err();
195        assert!(matches!(err, Error::InvalidUrl { .. }));
196    }
197
198    #[test]
199    fn fetch_private_address_is_rejected() {
200        let client = Client::new();
201        let err = client.fetch("http://127.0.0.1/").unwrap_err();
202        assert!(err.is_network(), "got: {err:?}");
203    }
204}