1use std::sync::Arc;
4use std::time::Duration;
5
6use crate::error::{Error, Result};
7use crate::fetch::{self, FetchOptions, Page};
8use crate::net::sanitize_user_agent;
9use crate::visibility::VisibilityPolicy;
10
11#[derive(Debug, Clone)]
13pub struct Client {
14 inner: Arc<ClientInner>,
15}
16
17#[derive(Debug)]
18pub(crate) struct ClientInner {
19 pub(crate) timeout: Duration,
20 pub(crate) settle: Duration,
21 pub(crate) user_agent: Option<String>,
22 pub(crate) visibility: VisibilityPolicy,
23}
24
25impl Default for Client {
26 fn default() -> Self {
27 Self::new()
28 }
29}
30
31impl Client {
32 #[must_use]
34 pub fn new() -> Self {
35 Self::builder().build()
36 }
37
38 pub fn builder() -> ClientBuilder {
40 ClientBuilder::default()
41 }
42
43 pub async fn fetch(&self, url: &str) -> Result<Page> {
45 fetch::fetch(&self.options(url)).await
46 }
47
48 pub async fn fetch_with(&self, opts: &FetchOptions) -> Result<Page> {
53 fetch::fetch(&self.apply_defaults(opts)).await
54 }
55
56 pub async fn markdown(&self, url: &str) -> Result<String> {
58 self.fetch(url).await?.markdown_with_url(url)
59 }
60
61 pub async fn text(&self, url: &str) -> Result<String> {
63 Ok(self.fetch(url).await?.inner_text)
64 }
65
66 pub async fn extract_json(&self, url: &str) -> Result<String> {
68 self.fetch(url).await?.extract_json_with_url(url)
69 }
70
71 pub async fn screenshot(&self, url: &str, opts: &ScreenshotOptions) -> Result<Vec<u8>> {
73 let fopts = self.apply_defaults(&FetchOptions::screenshot(url, opts.full_page));
74 let page = fetch::fetch(&fopts).await?;
75 page.screenshot_png()
76 .map(<[u8]>::to_vec)
77 .ok_or_else(|| Error::screenshot(anyhow::anyhow!("screenshot returned no data"), Some(url.to_string())))
78 }
79
80 pub async fn execute_js(&self, url: &str, expression: impl Into<String>) -> Result<String> {
82 let fopts = self.apply_defaults(&FetchOptions::javascript(url, expression));
83 let page = fetch::fetch(&fopts).await?;
84 page.js_result
85 .ok_or_else(|| Error::javascript(anyhow::anyhow!("execute_js returned no result"), Some(url.to_string())))
86 }
87
88 fn apply_defaults(&self, opts: &FetchOptions) -> FetchOptions {
89 let mut opts = opts.clone();
90 if opts.timeout.is_none() {
91 opts.timeout = Some(self.inner.timeout);
92 }
93 if opts.settle.is_none() {
94 opts.settle = Some(self.inner.settle);
95 }
96 if opts.visibility.is_none() {
97 opts.visibility = Some(self.inner.visibility);
98 }
99 if opts.user_agent.is_none()
100 && let Some(ua) = self.inner.user_agent.as_deref()
101 {
102 opts.user_agent = Some(ua.to_owned());
103 }
104 opts
105 }
106
107 fn options(&self, url: &str) -> FetchOptions {
108 self.apply_defaults(&FetchOptions::new(url))
109 }
110}
111
112#[derive(Debug, Default, Clone)]
114#[non_exhaustive]
115pub struct ScreenshotOptions {
116 pub full_page: bool,
118}
119
120#[must_use = "ClientBuilder does nothing until .build() is called"]
122#[derive(Debug, Default)]
123pub struct ClientBuilder {
124 timeout: Option<Duration>,
125 settle: Option<Duration>,
126 user_agent: Option<String>,
127 visibility: Option<VisibilityPolicy>,
128}
129
130impl ClientBuilder {
131 pub fn timeout(mut self, timeout: Duration) -> Self {
133 self.timeout = Some(timeout);
134 self
135 }
136
137 pub fn settle(mut self, settle: Duration) -> Self {
139 self.settle = Some(settle);
140 self
141 }
142
143 pub fn user_agent(mut self, ua: impl Into<String>) -> Self {
145 self.user_agent = Some(sanitize_user_agent(ua.into()));
146 self
147 }
148
149 pub fn visibility(mut self, policy: VisibilityPolicy) -> Self {
151 self.visibility = Some(policy);
152 self
153 }
154
155 #[must_use]
157 pub fn build(self) -> Client {
158 Client {
159 inner: Arc::new(self.build_inner()),
160 }
161 }
162
163 pub(crate) fn build_inner(self) -> ClientInner {
164 ClientInner {
165 timeout: self.timeout.unwrap_or(FetchOptions::DEFAULT_TIMEOUT),
166 settle: self.settle.unwrap_or_default(),
167 user_agent: self.user_agent,
168 visibility: self.visibility.unwrap_or_default(),
169 }
170 }
171}
172
173#[cfg(test)]
174mod tests {
175 use super::*;
176
177 #[test]
178 fn client_default_uses_30s_timeout() {
179 assert_eq!(Client::default().inner.timeout, FetchOptions::DEFAULT_TIMEOUT);
180 }
181
182 #[test]
183 fn client_builder_sets_timeout() {
184 let client = Client::builder().timeout(Duration::from_secs(60)).build();
185 assert_eq!(client.inner.timeout, Duration::from_secs(60));
186 }
187
188 #[test]
189 fn client_builder_sets_settle() {
190 let client = Client::builder().settle(Duration::from_millis(500)).build();
191 assert_eq!(client.inner.settle, Duration::from_millis(500));
192 }
193
194 #[test]
195 fn client_builder_sanitizes_user_agent() {
196 let client = Client::builder().user_agent("Bot\r\nX-Evil: yes").build();
197 assert_eq!(client.inner.user_agent.as_deref(), Some("Bot X-Evil: yes"));
198 }
199
200 #[test]
201 fn client_options_propagates_defaults() {
202 let client = Client::builder()
203 .timeout(Duration::from_secs(60))
204 .settle(Duration::from_millis(500))
205 .user_agent("MyBot")
206 .build();
207 let opts = client.options("https://example.com");
208 assert_eq!(opts.timeout, Some(Duration::from_secs(60)));
209 assert_eq!(opts.settle, Some(Duration::from_millis(500)));
210 assert_eq!(opts.user_agent.as_deref(), Some("MyBot"));
211 }
212
213 #[test]
214 fn client_apply_defaults_caller_value_wins() {
215 let client = Client::builder()
216 .timeout(Duration::from_secs(60))
217 .user_agent("ClientBot")
218 .build();
219 let user_opts = FetchOptions::new("https://example.com")
220 .timeout(Duration::from_secs(10))
221 .user_agent("UserBot");
222 let merged = client.apply_defaults(&user_opts);
223 assert_eq!(merged.timeout, Some(Duration::from_secs(10)));
225 assert_eq!(merged.user_agent.as_deref(), Some("UserBot"));
226 }
227
228 #[test]
229 fn client_apply_defaults_fills_unset_fields() {
230 let client = Client::builder()
231 .timeout(Duration::from_secs(60))
232 .settle(Duration::from_millis(750))
233 .user_agent("ClientBot")
234 .visibility(VisibilityPolicy::off())
235 .build();
236 let user_opts = FetchOptions::new("https://example.com");
237 let merged = client.apply_defaults(&user_opts);
238 assert_eq!(merged.timeout, Some(Duration::from_secs(60)));
240 assert_eq!(merged.settle, Some(Duration::from_millis(750)));
241 assert_eq!(merged.user_agent.as_deref(), Some("ClientBot"));
242 assert_eq!(merged.visibility, Some(VisibilityPolicy::off()));
243 }
244
245 #[test]
246 fn client_clone_shares_inner() {
247 let client = Client::new();
248 assert!(Arc::ptr_eq(&client.inner, &client.clone().inner));
249 }
250
251 #[test]
252 fn screenshot_options_default_is_viewport() {
253 assert!(!ScreenshotOptions::default().full_page);
254 }
255
256 #[test]
257 fn assert_send_sync() {
258 fn check<T: Send + Sync>() {}
259 check::<Client>();
260 check::<ClientBuilder>();
261 check::<ScreenshotOptions>();
262 }
263
264 #[test]
265 fn client_builder_sets_visibility() {
266 let client = Client::builder().visibility(VisibilityPolicy::off()).build();
267 assert_eq!(client.inner.visibility, VisibilityPolicy::off());
268 }
269
270 #[tokio::test]
271 async fn client_fetch_invalid_url_returns_invalid_url_error() {
272 let client = Client::new();
273 let err = client.fetch("not a url").await.unwrap_err();
274 assert!(matches!(err, Error::InvalidUrl { .. }), "got: {err:?}");
275 }
276
277 #[tokio::test]
278 async fn client_fetch_private_address_is_rejected() {
279 let client = Client::new();
280 let err = client.fetch("http://127.0.0.1/").await.unwrap_err();
281 assert!(err.is_network(), "got: {err:?}");
282 }
283}