Skip to main content

browsr_types/
client_api.rs

1use schemars::JsonSchema;
2use serde::{Deserialize, Serialize};
3use serde_json::Value;
4
5#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq)]
6#[serde(rename_all = "camelCase")]
7pub enum ScrapeFormat {
8    Markdown,
9    Html,
10    Screenshot,
11    Structured,
12    Agent,
13}
14
15#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
16#[serde(rename_all = "camelCase")]
17pub struct JsonExtractionOptions {
18    #[serde(default, skip_serializing_if = "Option::is_none")]
19    pub prompt: Option<String>,
20    #[serde(default, skip_serializing_if = "Option::is_none")]
21    pub schema: Option<Value>,
22}
23
24#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
25#[serde(rename_all = "camelCase")]
26pub struct ScrapeAction {
27    #[serde(rename = "type")]
28    pub action_type: String,
29    #[serde(default, skip_serializing_if = "Option::is_none")]
30    pub selector: Option<String>,
31    #[serde(default, skip_serializing_if = "Option::is_none")]
32    pub text: Option<String>,
33    #[serde(default, skip_serializing_if = "Option::is_none")]
34    pub milliseconds: Option<u64>,
35    #[serde(default, skip_serializing_if = "Option::is_none")]
36    pub expression: Option<String>,
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
40#[serde(rename_all = "camelCase")]
41pub struct ScrapeApiRequest {
42    pub url: String,
43    #[serde(default = "default_scrape_formats")]
44    pub formats: Vec<ScrapeFormat>,
45    #[serde(default, skip_serializing_if = "Option::is_none")]
46    pub wait_for: Option<u64>,
47    #[serde(default, skip_serializing_if = "Option::is_none")]
48    pub actions: Option<Vec<ScrapeAction>>,
49    #[serde(default, skip_serializing_if = "Option::is_none")]
50    pub json_options: Option<JsonExtractionOptions>,
51    #[serde(default = "default_true")]
52    pub only_main_content: bool,
53    #[serde(default = "default_true")]
54    pub remove_base64_images: bool,
55}
56
57fn default_scrape_formats() -> Vec<ScrapeFormat> {
58    vec![ScrapeFormat::Markdown]
59}
60
61fn default_true() -> bool {
62    true
63}
64
65impl ScrapeApiRequest {
66    pub fn new(url: impl Into<String>) -> Self {
67        Self {
68            url: url.into(),
69            formats: vec![ScrapeFormat::Markdown],
70            wait_for: None,
71            actions: None,
72            json_options: None,
73            only_main_content: true,
74            remove_base64_images: true,
75        }
76    }
77
78    pub fn with_formats(mut self, formats: Vec<ScrapeFormat>) -> Self {
79        self.formats = formats;
80        self
81    }
82
83    pub fn with_wait(mut self, ms: u64) -> Self {
84        self.wait_for = Some(ms);
85        self
86    }
87}
88
89#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
90#[serde(rename_all = "camelCase")]
91pub struct PageMetadata {
92    #[serde(default)]
93    pub title: Option<String>,
94    #[serde(default)]
95    pub description: Option<String>,
96    #[serde(default, rename = "sourceURL")]
97    pub source_url: String,
98    #[serde(default)]
99    pub status_code: Option<u16>,
100}
101
102/// HTML output from /v1/scrape.
103#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
104#[serde(rename_all = "camelCase")]
105pub struct HtmlResult {
106    #[serde(default, skip_serializing_if = "Option::is_none")]
107    pub full: Option<String>,
108    #[serde(default, skip_serializing_if = "Vec::is_empty")]
109    pub selectors: Vec<HtmlSelectorResult>,
110}
111
112#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
113#[serde(rename_all = "camelCase")]
114pub struct HtmlSelectorResult {
115    pub selector: String,
116    pub html: String,
117}
118
119/// Scraped data matching browsr-cloud /v1/scrape response.
120#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
121#[serde(rename_all = "camelCase")]
122pub struct ScrapeData {
123    #[serde(default, skip_serializing_if = "Option::is_none")]
124    pub markdown: Option<String>,
125    #[serde(default, skip_serializing_if = "Option::is_none")]
126    pub html: Option<HtmlResult>,
127    #[serde(default, skip_serializing_if = "Option::is_none")]
128    pub screenshot: Option<String>,
129    #[serde(default, skip_serializing_if = "Option::is_none")]
130    pub structured: Option<Value>,
131    #[serde(default, skip_serializing_if = "Option::is_none")]
132    pub agent: Option<Value>,
133    #[serde(default)]
134    pub metadata: PageMetadata,
135    #[serde(default, skip_serializing_if = "Option::is_none")]
136    pub warning: Option<String>,
137}
138
139#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
140pub struct ScrapeApiResponse {
141    pub success: bool,
142    pub data: ScrapeData,
143}
144
145#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
146#[serde(rename_all = "camelCase")]
147pub struct CrawlApiRequest {
148    pub url: String,
149    #[serde(default = "default_crawl_limit")]
150    pub limit: usize,
151    #[serde(default = "default_crawl_depth")]
152    pub max_depth: usize,
153    #[serde(default = "default_scrape_formats")]
154    pub formats: Vec<ScrapeFormat>,
155    #[serde(default, skip_serializing_if = "Option::is_none")]
156    pub wait_for: Option<u64>,
157    #[serde(default, skip_serializing_if = "Option::is_none")]
158    pub include_paths: Option<Vec<String>>,
159    #[serde(default, skip_serializing_if = "Option::is_none")]
160    pub exclude_paths: Option<Vec<String>>,
161    #[serde(default = "default_true")]
162    pub only_main_content: bool,
163    #[serde(default, skip_serializing_if = "Option::is_none")]
164    pub json_options: Option<JsonExtractionOptions>,
165}
166
167fn default_crawl_limit() -> usize {
168    10
169}
170
171fn default_crawl_depth() -> usize {
172    2
173}
174
175impl CrawlApiRequest {
176    pub fn new(url: impl Into<String>) -> Self {
177        Self {
178            url: url.into(),
179            limit: 10,
180            max_depth: 2,
181            formats: vec![ScrapeFormat::Markdown],
182            wait_for: None,
183            include_paths: None,
184            exclude_paths: None,
185            only_main_content: true,
186            json_options: None,
187        }
188    }
189}
190
191#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
192pub struct CrawlApiResponse {
193    pub success: bool,
194    pub total: usize,
195    pub completed: usize,
196    pub data: Vec<ScrapeData>,
197}
198
199#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
200pub struct SessionCreated {
201    pub session_id: String,
202    #[serde(default)]
203    pub sse_url: Option<String>,
204    #[serde(default)]
205    pub frame_url: Option<String>,
206    #[serde(default)]
207    pub frame_token: Option<String>,
208}
209
210impl SessionCreated {
211    pub fn build_sse_url(&self, base_url: &str, width: Option<u32>, height: Option<u32>) -> String {
212        let mut url = self
213            .sse_url
214            .clone()
215            .unwrap_or_else(|| format!("{}/stream/sse?session_id={}", base_url, self.session_id));
216
217        if let Some(ref token) = self.frame_token {
218            let sep = if url.contains('?') { "&" } else { "?" };
219            url = format!("{}{}token={}", url, sep, token);
220        }
221        if let Some(w) = width {
222            let sep = if url.contains('?') { "&" } else { "?" };
223            url = format!("{}{}width={}", url, sep, w);
224        }
225        if let Some(h) = height {
226            url = format!("{}&height={}", url, h);
227        }
228        url
229    }
230}
231
232#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
233pub struct ObserveOptions {
234    pub use_image: Option<bool>,
235    pub full_page: Option<bool>,
236    pub wait_ms: Option<u64>,
237    pub include_content: Option<bool>,
238}
239
240impl Default for ObserveOptions {
241    fn default() -> Self {
242        Self {
243            use_image: Some(true),
244            full_page: None,
245            wait_ms: None,
246            include_content: Some(true),
247        }
248    }
249}
250
251#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
252pub struct RelayEvent {
253    pub ts: i64,
254    pub session_id: String,
255    pub category: String,
256    pub method: Option<String>,
257    pub level: Option<String>,
258    pub summary: Option<String>,
259    pub payload: Value,
260}
261
262#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
263pub struct RelayEventsResponse {
264    pub session_id: String,
265    pub count: usize,
266    pub events: Vec<RelayEvent>,
267}
268
269#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
270pub struct RelaySessionInfo {
271    pub session_id: String,
272    pub connected: bool,
273    pub connected_at: i64,
274    #[serde(default)]
275    pub last_activity: Option<i64>,
276    #[serde(default)]
277    pub idle_secs: Option<i64>,
278    #[serde(default)]
279    pub user_email: Option<String>,
280    #[serde(default)]
281    pub tab_url: Option<String>,
282    #[serde(default)]
283    pub tab_title: Option<String>,
284}
285
286#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
287pub struct RelaySessionListResponse {
288    pub sessions: Vec<RelaySessionInfo>,
289}
290
291#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)]
292#[serde(rename_all = "camelCase")]
293pub struct ShellCreateSessionRequest {
294    #[serde(default, skip_serializing_if = "Option::is_none")]
295    pub image: Option<String>,
296    #[serde(default, skip_serializing_if = "Option::is_none")]
297    pub language: Option<String>,
298    #[serde(default, skip_serializing_if = "Option::is_none")]
299    pub timeout_secs: Option<u32>,
300    #[serde(default, skip_serializing_if = "Option::is_none")]
301    pub working_dir: Option<String>,
302}
303
304#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
305#[serde(rename_all = "camelCase")]
306pub struct ShellCreateSessionResponse {
307    pub session_id: String,
308    pub status: String,
309    #[serde(default)]
310    pub worker_id: Option<String>,
311    #[serde(default)]
312    pub language: Option<String>,
313}
314
315#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
316#[serde(rename_all = "camelCase")]
317pub struct ShellSessionListItem {
318    pub session_id: String,
319    pub status: String,
320    pub image: String,
321    pub created_at: String,
322    pub last_activity: String,
323}
324
325#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
326pub struct ShellSessionListResponse {
327    pub sessions: Vec<ShellSessionListItem>,
328}
329
330#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
331pub struct ShellTerminateResponse {
332    pub session_id: String,
333    pub status: String,
334}
335
336#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
337pub struct ShellExecRequest {
338    pub session_id: String,
339    pub command: String,
340    #[serde(default, skip_serializing_if = "Option::is_none")]
341    pub timeout_secs: Option<u32>,
342    #[serde(default, skip_serializing_if = "Option::is_none")]
343    pub working_dir: Option<String>,
344}
345
346#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
347pub struct ShellExecResult {
348    #[serde(default)]
349    pub stdout: String,
350    #[serde(default)]
351    pub stderr: String,
352    #[serde(default)]
353    pub exit_code: Option<i32>,
354    #[serde(default)]
355    pub duration_ms: Option<u64>,
356    #[serde(default)]
357    pub timed_out: bool,
358}
359
360#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
361pub struct ShellExecResponse {
362    pub session_id: String,
363    #[serde(flatten)]
364    pub result: ShellExecResult,
365}