Skip to main content

servo_fetch/blocking/
client.rs

1//! Blocking client mirror of [`crate::Client`].
2
3use std::sync::Arc;
4use std::time::Duration;
5
6use crate::error::{Error, Result};
7use crate::fetch::{FetchOptions, Page, fetch_blocking};
8use crate::visibility::VisibilityPolicy;
9use crate::{ScreenshotOptions, client};
10
11/// Blocking client carrying reusable fetch defaults.
12#[derive(Debug, Clone)]
13pub struct Client {
14    inner: Arc<client::ClientInner>,
15}
16
17impl Default for Client {
18    fn default() -> Self {
19        Self::new()
20    }
21}
22
23impl Client {
24    /// Construct a client with default options.
25    #[must_use]
26    pub fn new() -> Self {
27        Self::builder().build()
28    }
29
30    /// Begin building a client with custom options.
31    pub fn builder() -> ClientBuilder {
32        ClientBuilder::default()
33    }
34
35    /// Fetch a page using the client's default options.
36    pub fn fetch(&self, url: &str) -> Result<Page> {
37        fetch_blocking(&self.inner.options(url))
38    }
39
40    /// Fetch a page with explicit options.
41    pub fn fetch_with(&self, opts: &FetchOptions) -> Result<Page> {
42        fetch_blocking(&self.inner.apply_defaults(opts.clone()))
43    }
44
45    /// Fetch and extract readable Markdown.
46    pub fn markdown(&self, url: &str) -> Result<String> {
47        self.fetch(url)?.markdown_with_url(url)
48    }
49
50    /// Fetch and extract plain text (`document.body.innerText`).
51    pub fn text(&self, url: &str) -> Result<String> {
52        Ok(self.fetch(url)?.inner_text)
53    }
54
55    /// Fetch and extract structured JSON.
56    pub fn extract_json(&self, url: &str) -> Result<String> {
57        self.fetch(url)?.extract_json_with_url(url)
58    }
59
60    /// Capture a PNG screenshot of the page.
61    pub fn screenshot(&self, url: &str, opts: &ScreenshotOptions) -> Result<Vec<u8>> {
62        let fopts = self.inner.apply_defaults(FetchOptions::screenshot(url, opts.full_page));
63        let page = fetch_blocking(&fopts)?;
64        page.screenshot_png()
65            .map(<[u8]>::to_vec)
66            .ok_or_else(|| Error::screenshot(anyhow::anyhow!("screenshot returned no data"), Some(url.to_string())))
67    }
68
69    /// Execute a JavaScript expression after page load and return the result.
70    pub fn execute_js(&self, url: &str, expression: impl Into<String>) -> Result<String> {
71        let fopts = self.inner.apply_defaults(FetchOptions::javascript(url, expression));
72        let page = fetch_blocking(&fopts)?;
73        page.js_result
74            .ok_or_else(|| Error::javascript(anyhow::anyhow!("execute_js returned no result"), Some(url.to_string())))
75    }
76}
77
78/// Builder for [`Client`].
79#[must_use = "ClientBuilder does nothing until .build() is called"]
80#[derive(Debug, Default)]
81pub struct ClientBuilder {
82    inner: client::ClientBuilder,
83}
84
85impl ClientBuilder {
86    /// Default page-load timeout (default 30s).
87    pub fn timeout(mut self, timeout: Duration) -> Self {
88        self.inner = self.inner.timeout(timeout);
89        self
90    }
91
92    /// Default settle wait after the load event (default 0).
93    pub fn settle(mut self, settle: Duration) -> Self {
94        self.inner = self.inner.settle(settle);
95        self
96    }
97
98    /// Default User-Agent header.
99    pub fn user_agent(mut self, ua: impl Into<String>) -> Self {
100        self.inner = self.inner.user_agent(ua);
101        self
102    }
103
104    /// Default visibility policy applied during extraction.
105    pub fn visibility(mut self, policy: VisibilityPolicy) -> Self {
106        self.inner = self.inner.visibility(policy);
107        self
108    }
109
110    /// Build the [`Client`].
111    #[must_use]
112    pub fn build(self) -> Client {
113        Client {
114            inner: Arc::new(self.inner.build_inner()),
115        }
116    }
117}
118
119#[cfg(test)]
120mod tests {
121    use super::*;
122
123    #[test]
124    fn client_default_uses_30s_timeout() {
125        assert_eq!(Client::default().inner.timeout, Duration::from_secs(30));
126    }
127
128    #[test]
129    fn client_builder_sets_timeout() {
130        let client = Client::builder().timeout(Duration::from_secs(60)).build();
131        assert_eq!(client.inner.timeout, Duration::from_secs(60));
132    }
133
134    #[test]
135    fn client_builder_sets_settle() {
136        let client = Client::builder().settle(Duration::from_millis(500)).build();
137        assert_eq!(client.inner.settle, Duration::from_millis(500));
138    }
139
140    #[test]
141    fn client_builder_sets_visibility() {
142        let client = Client::builder().visibility(VisibilityPolicy::off()).build();
143        assert_eq!(client.inner.visibility, VisibilityPolicy::off());
144    }
145
146    #[test]
147    fn client_builder_sanitizes_user_agent() {
148        let client = Client::builder().user_agent("Bot\r\nX-Evil: yes").build();
149        assert_eq!(client.inner.user_agent.as_deref(), Some("Bot  X-Evil: yes"));
150    }
151
152    #[test]
153    fn client_clone_shares_inner() {
154        let client = Client::new();
155        assert!(Arc::ptr_eq(&client.inner, &client.clone().inner));
156    }
157
158    #[test]
159    fn assert_send_sync() {
160        fn check<T: Send + Sync>() {}
161        check::<Client>();
162        check::<ClientBuilder>();
163    }
164
165    #[test]
166    fn fetch_invalid_url_returns_invalid_url_error() {
167        let client = Client::new();
168        let err = client.fetch("not a url").unwrap_err();
169        assert!(matches!(err, Error::InvalidUrl { .. }));
170    }
171
172    #[test]
173    fn fetch_private_address_is_rejected() {
174        let client = Client::new();
175        let err = client.fetch("http://127.0.0.1/").unwrap_err();
176        assert!(err.is_network(), "got: {err:?}");
177    }
178}