1use std::sync::Arc;
4use std::time::Duration;
5
6use crate::error::{Error, Result};
7use crate::fetch::{self, FetchOptions, Page};
8use crate::net::sanitize_user_agent;
9use crate::visibility::VisibilityPolicy;
10
11#[derive(Debug, Clone)]
13pub struct Client {
14 inner: Arc<ClientInner>,
15}
16
17#[derive(Debug)]
18pub(crate) struct ClientInner {
19 pub(crate) timeout: Duration,
20 pub(crate) settle: Duration,
21 pub(crate) user_agent: Option<String>,
22 pub(crate) visibility: VisibilityPolicy,
23}
24
25impl ClientInner {
26 pub(crate) fn apply_defaults(&self, mut opts: FetchOptions) -> FetchOptions {
28 opts.timeout.get_or_insert(self.timeout);
29 opts.settle.get_or_insert(self.settle);
30 opts.visibility.get_or_insert(self.visibility);
31 if let Some(ua) = &self.user_agent {
32 opts.user_agent.get_or_insert_with(|| ua.clone());
33 }
34 opts
35 }
36
37 pub(crate) fn options(&self, url: &str) -> FetchOptions {
38 self.apply_defaults(FetchOptions::new(url))
39 }
40}
41
42impl Default for Client {
43 fn default() -> Self {
44 Self::new()
45 }
46}
47
48impl Client {
49 #[must_use]
51 pub fn new() -> Self {
52 Self::builder().build()
53 }
54
55 pub fn builder() -> ClientBuilder {
57 ClientBuilder::default()
58 }
59
60 pub async fn fetch(&self, url: &str) -> Result<Page> {
62 fetch::fetch(&self.inner.options(url)).await
63 }
64
65 pub async fn fetch_with(&self, opts: &FetchOptions) -> Result<Page> {
67 fetch::fetch(&self.inner.apply_defaults(opts.clone())).await
68 }
69
70 pub async fn markdown(&self, url: &str) -> Result<String> {
72 self.fetch(url).await?.markdown_with_url(url)
73 }
74
75 pub async fn text(&self, url: &str) -> Result<String> {
77 Ok(self.fetch(url).await?.inner_text)
78 }
79
80 pub async fn extract_json(&self, url: &str) -> Result<String> {
82 self.fetch(url).await?.extract_json_with_url(url)
83 }
84
85 pub async fn screenshot(&self, url: &str, opts: &ScreenshotOptions) -> Result<Vec<u8>> {
87 let fopts = self.inner.apply_defaults(FetchOptions::screenshot(url, opts.full_page));
88 let page = fetch::fetch(&fopts).await?;
89 page.screenshot_png()
90 .map(<[u8]>::to_vec)
91 .ok_or_else(|| Error::screenshot(anyhow::anyhow!("screenshot returned no data"), Some(url.to_string())))
92 }
93
94 pub async fn execute_js(&self, url: &str, expression: impl Into<String>) -> Result<String> {
96 let fopts = self.inner.apply_defaults(FetchOptions::javascript(url, expression));
97 let page = fetch::fetch(&fopts).await?;
98 page.js_result
99 .ok_or_else(|| Error::javascript(anyhow::anyhow!("execute_js returned no result"), Some(url.to_string())))
100 }
101}
102
103#[derive(Debug, Default, Clone)]
105#[non_exhaustive]
106pub struct ScreenshotOptions {
107 pub full_page: bool,
109}
110
111#[must_use = "ClientBuilder does nothing until .build() is called"]
113#[derive(Debug, Default)]
114pub struct ClientBuilder {
115 timeout: Option<Duration>,
116 settle: Option<Duration>,
117 user_agent: Option<String>,
118 visibility: Option<VisibilityPolicy>,
119}
120
121impl ClientBuilder {
122 pub fn timeout(mut self, timeout: Duration) -> Self {
124 self.timeout = Some(timeout);
125 self
126 }
127
128 pub fn settle(mut self, settle: Duration) -> Self {
130 self.settle = Some(settle);
131 self
132 }
133
134 pub fn user_agent(mut self, ua: impl Into<String>) -> Self {
136 self.user_agent = Some(sanitize_user_agent(ua.into()));
137 self
138 }
139
140 pub fn visibility(mut self, policy: VisibilityPolicy) -> Self {
142 self.visibility = Some(policy);
143 self
144 }
145
146 #[must_use]
148 pub fn build(self) -> Client {
149 Client {
150 inner: Arc::new(self.build_inner()),
151 }
152 }
153
154 pub(crate) fn build_inner(self) -> ClientInner {
155 ClientInner {
156 timeout: self.timeout.unwrap_or(FetchOptions::DEFAULT_TIMEOUT),
157 settle: self.settle.unwrap_or_default(),
158 user_agent: self.user_agent,
159 visibility: self.visibility.unwrap_or_default(),
160 }
161 }
162}
163
164#[cfg(test)]
165mod tests {
166 use super::*;
167
168 #[test]
169 fn client_default_uses_30s_timeout() {
170 assert_eq!(Client::default().inner.timeout, FetchOptions::DEFAULT_TIMEOUT);
171 }
172
173 #[test]
174 fn client_builder_sets_timeout() {
175 let client = Client::builder().timeout(Duration::from_secs(60)).build();
176 assert_eq!(client.inner.timeout, Duration::from_secs(60));
177 }
178
179 #[test]
180 fn client_builder_sets_settle() {
181 let client = Client::builder().settle(Duration::from_millis(500)).build();
182 assert_eq!(client.inner.settle, Duration::from_millis(500));
183 }
184
185 #[test]
186 fn client_builder_sanitizes_user_agent() {
187 let client = Client::builder().user_agent("Bot\r\nX-Evil: yes").build();
188 assert_eq!(client.inner.user_agent.as_deref(), Some("Bot X-Evil: yes"));
189 }
190
191 #[test]
192 fn client_options_propagates_defaults() {
193 let client = Client::builder()
194 .timeout(Duration::from_secs(60))
195 .settle(Duration::from_millis(500))
196 .user_agent("MyBot")
197 .build();
198 let opts = client.inner.options("https://example.com");
199 assert_eq!(opts.timeout, Some(Duration::from_secs(60)));
200 assert_eq!(opts.settle, Some(Duration::from_millis(500)));
201 assert_eq!(opts.user_agent.as_deref(), Some("MyBot"));
202 }
203
204 #[test]
205 fn client_apply_defaults_caller_value_wins() {
206 let client = Client::builder()
207 .timeout(Duration::from_secs(60))
208 .user_agent("ClientBot")
209 .build();
210 let user_opts = FetchOptions::new("https://example.com")
211 .timeout(Duration::from_secs(10))
212 .user_agent("UserBot");
213 let merged = client.inner.apply_defaults(user_opts);
214 assert_eq!(merged.timeout, Some(Duration::from_secs(10)));
216 assert_eq!(merged.user_agent.as_deref(), Some("UserBot"));
217 }
218
219 #[test]
220 fn client_apply_defaults_fills_unset_fields() {
221 let client = Client::builder()
222 .timeout(Duration::from_secs(60))
223 .settle(Duration::from_millis(750))
224 .user_agent("ClientBot")
225 .visibility(VisibilityPolicy::off())
226 .build();
227 let user_opts = FetchOptions::new("https://example.com");
228 let merged = client.inner.apply_defaults(user_opts);
229 assert_eq!(merged.timeout, Some(Duration::from_secs(60)));
231 assert_eq!(merged.settle, Some(Duration::from_millis(750)));
232 assert_eq!(merged.user_agent.as_deref(), Some("ClientBot"));
233 assert_eq!(merged.visibility, Some(VisibilityPolicy::off()));
234 }
235
236 #[test]
237 fn client_clone_shares_inner() {
238 let client = Client::new();
239 assert!(Arc::ptr_eq(&client.inner, &client.clone().inner));
240 }
241
242 #[test]
243 fn screenshot_options_default_is_viewport() {
244 assert!(!ScreenshotOptions::default().full_page);
245 }
246
247 #[test]
248 fn assert_send_sync() {
249 fn check<T: Send + Sync>() {}
250 check::<Client>();
251 check::<ClientBuilder>();
252 check::<ScreenshotOptions>();
253 }
254
255 #[test]
256 fn client_builder_sets_visibility() {
257 let client = Client::builder().visibility(VisibilityPolicy::off()).build();
258 assert_eq!(client.inner.visibility, VisibilityPolicy::off());
259 }
260
261 #[tokio::test]
262 async fn client_fetch_invalid_url_returns_invalid_url_error() {
263 let client = Client::new();
264 let err = client.fetch("not a url").await.unwrap_err();
265 assert!(matches!(err, Error::InvalidUrl { .. }), "got: {err:?}");
266 }
267
268 #[tokio::test]
269 async fn client_fetch_private_address_is_rejected() {
270 let client = Client::new();
271 let err = client.fetch("http://127.0.0.1/").await.unwrap_err();
272 assert!(err.is_network(), "got: {err:?}");
273 }
274}