firecrawl_sdk/
document.rs

1use serde::{Deserialize, Serialize};
2use serde_json::Value;
3
4#[cfg(feature = "mcp-tool")]
5use schemars::JsonSchema;
6
7#[serde_with::skip_serializing_none]
8#[derive(Deserialize, Serialize, Debug, Default, Clone, PartialEq, Eq)]
9#[cfg_attr(feature = "mcp-tool", derive(JsonSchema))]
10#[serde(rename_all = "camelCase")]
11pub struct DocumentMetadata {
12    // Required fields from the API
13    #[serde(rename = "sourceURL")]
14    pub source_url: String,
15    pub status_code: u16,
16    pub error: Option<String>,
17
18    // Common metadata fields - all are optional and can be either strings or arrays
19    #[serde(default)]
20    #[serde(deserialize_with = "deserialize_string_or_vec")]
21    pub title: Option<String>,
22
23    #[serde(default)]
24    #[serde(deserialize_with = "deserialize_string_or_vec")]
25    pub description: Option<String>,
26
27    #[serde(default)]
28    #[serde(deserialize_with = "deserialize_string_or_vec")]
29    pub language: Option<String>,
30
31    // All other metadata fields are captured here
32    #[serde(flatten)]
33    pub additional_fields: std::collections::HashMap<String, Value>,
34}
35
36// Helper function to deserialize a field that could be either a string or an array of strings
37fn deserialize_string_or_vec<'de, D>(deserializer: D) -> Result<Option<String>, D::Error>
38where
39    D: serde::Deserializer<'de>,
40{
41    struct StringOrVec;
42
43    impl<'de> serde::de::Visitor<'de> for StringOrVec {
44        type Value = Option<String>;
45
46        fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
47            formatter.write_str("string or array of strings")
48        }
49
50        fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
51        where
52            E: serde::de::Error,
53        {
54            Ok(Some(value.to_string()))
55        }
56
57        fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
58        where
59            E: serde::de::Error,
60        {
61            Ok(Some(value))
62        }
63
64        fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
65        where
66            A: serde::de::SeqAccess<'de>,
67        {
68            // Take the first element if it's an array
69            if let Some(first) = seq.next_element::<String>()? {
70                return Ok(Some(first));
71            }
72            Ok(None)
73        }
74
75        fn visit_none<E>(self) -> Result<Self::Value, E>
76        where
77            E: serde::de::Error,
78        {
79            Ok(None)
80        }
81
82        fn visit_unit<E>(self) -> Result<Self::Value, E>
83        where
84            E: serde::de::Error,
85        {
86            Ok(None)
87        }
88    }
89
90    deserializer.deserialize_any(StringOrVec)
91}
92
93/// Represents a scrape result from an action
94#[serde_with::skip_serializing_none]
95#[derive(Deserialize, Serialize, Debug, Default, Clone, PartialEq, Eq)]
96#[serde(rename_all = "camelCase")]
97pub struct ScrapeActionResult {
98    /// The URL that was scraped
99    pub url: String,
100    /// The HTML content of the scraped URL
101    pub html: String,
102}
103
104/// Represents a JavaScript return value from an action
105#[serde_with::skip_serializing_none]
106#[derive(Deserialize, Serialize, Debug, Default, Clone, PartialEq, Eq)]
107#[serde(rename_all = "camelCase")]
108pub struct JavaScriptReturnValue {
109    /// The type of the returned value
110    #[serde(rename = "type")]
111    pub value_type: String,
112    /// The actual value returned
113    pub value: Value,
114}
115
116/// Represents the results of actions performed during scraping
117#[serde_with::skip_serializing_none]
118#[derive(Deserialize, Serialize, Debug, Default, Clone, PartialEq, Eq)]
119#[serde(rename_all = "camelCase")]
120pub struct ActionResults {
121    /// URLs to screenshots taken during actions
122    pub screenshots: Option<Vec<String>>,
123    /// Results of scrape actions
124    pub scrapes: Option<Vec<ScrapeActionResult>>,
125    /// Results of JavaScript execution actions
126    pub javascript_returns: Option<Vec<JavaScriptReturnValue>>,
127}
128
129#[serde_with::skip_serializing_none]
130#[derive(Deserialize, Serialize, Debug, Default, Clone, PartialEq, Eq)]
131#[serde(rename_all = "camelCase")]
132pub struct Document {
133    /// A list of the links on the page, present if `ScrapeFormats::Markdown` is present in `ScrapeOptions.formats`. (default)
134    pub markdown: Option<String>,
135
136    /// The HTML of the page, present if `ScrapeFormats::HTML` is present in `ScrapeOptions.formats`.
137    ///
138    /// This contains HTML that has non-content tags removed. If you need the original HTML, use `ScrapeFormats::RawHTML`.
139    pub html: Option<String>,
140
141    /// The raw HTML of the page, present if `ScrapeFormats::RawHTML` is present in `ScrapeOptions.formats`.
142    ///
143    /// This contains the original, untouched HTML on the page. If you only need human-readable content, use `ScrapeFormats::HTML`.
144    pub raw_html: Option<String>,
145
146    /// The URL to the screenshot of the page, present if `ScrapeFormats::Screenshot` or `ScrapeFormats::ScreenshotFullPage` is present in `ScrapeOptions.formats`.
147    pub screenshot: Option<String>,
148
149    /// A list of the links on the page, present if `ScrapeFormats::Links` is present in `ScrapeOptions.formats`.
150    pub links: Option<Vec<String>>,
151
152    /// The extracted data from the page, present if `ScrapeFormats::Extract` is present in `ScrapeOptions.formats`.
153    /// If `ScrapeOptions.extract.schema` is `Some`, this `Value` is guaranteed to match the provided schema.
154    #[serde(alias = "llm_extraction")]
155    pub extract: Option<Value>,
156
157    /// The structured JSON data from the page, present if `ScrapeFormats::JSON` is present in `ScrapeOptions.formats`.
158    /// If `ScrapeOptions.jsonOptions.schema` is `Some`, this `Value` is guaranteed to match the provided schema.
159    pub json: Option<Value>,
160
161    /// Results of actions performed during scraping, present if `actions` parameter was provided in the request.
162    pub actions: Option<ActionResults>,
163
164    /// The metadata from the page.
165    pub metadata: DocumentMetadata,
166
167    /// Can be present if `ScrapeFormats::Extract` is present in `ScrapeOptions.formats`.
168    /// The warning message will contain any errors encountered during the extraction.
169    pub warning: Option<String>,
170}