spider_client/shapes/
response.rs

1use bytes::Bytes;
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4
5#[derive(Debug, Deserialize, Serialize)]
6#[serde(untagged)]
7pub enum Content {
8    /// A raw string (e.g. plain text or HTML).
9    String(String),
10    /// Raw binary bytes.
11    Bytes(Bytes),
12    /// Structured object with optional formats.
13    Object {
14        raw: Option<String>,
15        bytes: Option<Bytes>,
16        text: Option<String>,
17        markdown: Option<String>,
18        html2text: Option<String>,
19        screenshot: Option<Bytes>,
20    },
21}
22
23impl Content {
24    /// Return the best-guess string representation of the content.
25    pub fn as_str(&self) -> Option<&str> {
26        match self {
27            Content::String(s) => Some(s),
28            Content::Object { text: Some(t), .. } => Some(t),
29            Content::Object { raw: Some(r), .. } => Some(r),
30            Content::Object {
31                html2text: Some(h), ..
32            } => Some(h),
33            Content::Object {
34                markdown: Some(m), ..
35            } => Some(m),
36            _ => None,
37        }
38    }
39
40    /// Return raw bytes if available.
41    pub fn as_bytes(&self) -> Option<&Bytes> {
42        match self {
43            Content::Bytes(b) => Some(b),
44            Content::Object { bytes: Some(b), .. } => Some(b),
45            Content::Object {
46                screenshot: Some(b),
47                ..
48            } => Some(b),
49            _ => None,
50        }
51    }
52
53    /// Return text content or a fallback string view of bytes if UTF-8.
54    pub fn as_utf8_lossy(&self) -> Option<String> {
55        match self {
56            Content::String(s) => Some(s.clone()),
57            Content::Object { text: Some(t), .. } => Some(t.clone()),
58            Content::Object { raw: Some(r), .. } => Some(r.clone()),
59            Content::Object {
60                markdown: Some(m), ..
61            } => Some(m.clone()),
62            Content::Object {
63                html2text: Some(h), ..
64            } => Some(h.clone()),
65            Content::Bytes(b) => Some(String::from_utf8_lossy(b).to_string()),
66            Content::Object { bytes: Some(b), .. } => Some(String::from_utf8_lossy(b).to_string()),
67            _ => None,
68        }
69    }
70
71    /// Return the full object if the content is structured.
72    pub fn as_object(&self) -> Option<&Self> {
73        match self {
74            Content::Object { .. } => Some(self),
75            _ => None,
76        }
77    }
78
79    /// Check if the content is a screenshot (binary).
80    pub fn has_screenshot(&self) -> bool {
81        matches!(
82            self,
83            Content::Object {
84                screenshot: Some(_),
85                ..
86            }
87        )
88    }
89
90    /// Check if the content is empty or contains only whitespace.
91    pub fn is_empty(&self) -> bool {
92        match self {
93            Content::String(s) => s.trim().is_empty(),
94            Content::Bytes(b) => b.is_empty(),
95            Content::Object {
96                raw,
97                text,
98                markdown,
99                html2text,
100                bytes,
101                screenshot,
102            } => {
103                raw.as_ref().map_or(true, |s| s.trim().is_empty())
104                    && text.as_ref().map_or(true, |s| s.trim().is_empty())
105                    && markdown.as_ref().map_or(true, |s| s.trim().is_empty())
106                    && html2text.as_ref().map_or(true, |s| s.trim().is_empty())
107                    && bytes.as_ref().map_or(true, |b| b.is_empty())
108                    && screenshot.as_ref().map_or(true, |b| b.is_empty())
109            }
110        }
111    }
112
113    /// Try to extract a plain `.html` or `.txt` suitable string.
114    pub fn extract_plaintext(&self) -> Option<String> {
115        self.as_str()
116            .map(|s| s.to_string())
117            .or_else(|| self.as_utf8_lossy())
118    }
119
120    /// Returns all the content keys available.
121    pub fn available_keys(&self) -> Vec<&'static str> {
122        match self {
123            Content::Object {
124                raw,
125                bytes,
126                text,
127                markdown,
128                html2text,
129                screenshot,
130            } => {
131                let mut keys = vec![];
132                if raw.is_some() {
133                    keys.push("raw");
134                }
135                if bytes.is_some() {
136                    keys.push("bytes");
137                }
138                if text.is_some() {
139                    keys.push("text");
140                }
141                if markdown.is_some() {
142                    keys.push("markdown");
143                }
144                if html2text.is_some() {
145                    keys.push("html2text");
146                }
147                if screenshot.is_some() {
148                    keys.push("screenshot");
149                }
150                keys
151            }
152            Content::String(_) => vec!["string"],
153            Content::Bytes(_) => vec!["bytes"],
154        }
155    }
156}
157
158#[derive(Debug, Deserialize, Serialize, Default)]
159pub struct ApiResponse {
160    /// Textual or binary content of the page.
161    pub content: Bytes,
162    /// Status code returned from the source.
163    pub status: u16,
164    /// Final URL requested.
165    pub url: String,
166    /// All links found on the page.
167    pub links: Option<Vec<String>>,
168    /// Optional request map with timing values.
169    pub request_map: Option<HashMap<String, f64>>,
170    /// Optional metadata associated with the page.
171    pub metadata: Option<Metadata>,
172    /// Optional request cost breakdown.
173    pub costs: Option<Costs>,
174    /// Optional error message.
175    pub error: Option<String>,
176}
177
178#[derive(Debug, Deserialize, Serialize, Default)]
179pub struct Costs {
180    /// The cost of the AI.
181    pub ai_cost: f64,
182    /// The cost of the bytes transferred.
183    pub bytes_transferred_cost: f64,
184    /// The cost of the compute.
185    pub compute_cost: f64,
186    /// The cost of the file.
187    pub file_cost: f64,
188    /// The total cost of the request.
189    pub total_cost: f64,
190    /// The cost of the transform.
191    pub transform_cost: f64,
192}
193
194#[derive(Debug, Deserialize, Serialize, Default)]
195pub struct Metadata {
196    /// SEO title of the page.
197    pub title: String,
198    /// Meta description of the page.
199    pub description: String,
200    /// Final resolved URL if available.
201    pub url: Option<String>,
202    /// Social Open Graph preview image.
203    #[serde(rename = "og_image")]
204    pub image: Option<String>,
205    /// Optional keywords extracted from content.
206    pub keywords: Option<Vec<String>>,
207    /// Optional raw YouTube transcript string.
208    pub yt_transcript: Option<String>,
209    /// Domain of the source page.
210    pub domain: Option<String>,
211    /// Additional fallback fields.
212    pub pathname: Option<String>,
213    pub original_url: Option<String>,
214    pub user_id: Option<String>,
215    /// File-type classification if detected.
216    pub resource_type: Option<String>,
217    /// File size in bytes if known.
218    pub file_size: Option<u64>,
219    /// Any structured extraction result (generic).
220    pub extracted_data: Option<serde_json::Value>,
221    /// automation metadata:
222    pub automation_data: Option<serde_json::Value>
223}
224
225#[derive(Debug, Deserialize, Serialize, Default)]
226pub struct SearchList {
227    /// The main content list.
228    pub content: Vec<SearchEntry>,
229}
230
231#[derive(Debug, Deserialize, Serialize, Default)]
232pub struct SearchEntry {
233    #[serde(default)]
234    /// The search description.
235    pub description: Option<String>,
236    #[serde(default)]
237    /// The search title.
238    pub title: Option<String>,
239    #[serde(default)]
240    /// The search url.
241    pub url: String,
242}