firecrawl_sdk/
document.rs

1use serde::{Deserialize, Serialize};
2use serde_json::Value;
3
4#[cfg(feature = "mcp_tool")]
5use schemars::JsonSchema;
6
7#[serde_with::skip_serializing_none]
8#[derive(Deserialize, Serialize, Debug, Default, Clone, PartialEq, Eq)]
9#[cfg_attr(feature = "mcp_tool", derive(JsonSchema))]
10#[serde(rename_all = "camelCase")]
11pub struct DocumentMetadata {
12    // firecrawl specific
13    #[serde(rename = "sourceURL")]
14    pub source_url: String,
15    pub status_code: u16,
16    pub error: Option<String>,
17
18    // basic meta tags
19    pub title: Option<String>,
20    pub description: Option<String>,
21    pub language: Option<String>,
22    pub keywords: Option<String>,
23    pub robots: Option<String>,
24
25    // og: namespace
26    pub og_title: Option<String>,
27    pub og_description: Option<String>,
28    pub og_url: Option<String>,
29    pub og_image: Option<String>,
30    pub og_audio: Option<String>,
31    pub og_determiner: Option<String>,
32    pub og_locale: Option<String>,
33    pub og_locale_alternate: Option<Vec<String>>,
34    pub og_site_name: Option<String>,
35    pub og_video: Option<String>,
36
37    // article: namespace
38    pub article_section: Option<String>,
39    pub article_tag: Option<String>,
40    pub published_time: Option<String>,
41    pub modified_time: Option<String>,
42
43    // dc./dcterms. namespace
44    pub dcterms_keywords: Option<String>,
45    pub dc_description: Option<String>,
46    pub dc_subject: Option<String>,
47    pub dcterms_subject: Option<String>,
48    pub dcterms_audience: Option<String>,
49    pub dc_type: Option<String>,
50    pub dcterms_type: Option<String>,
51    pub dc_date: Option<String>,
52    pub dc_date_created: Option<String>,
53    pub dcterms_created: Option<String>,
54}
55
56#[serde_with::skip_serializing_none]
57#[derive(Deserialize, Serialize, Debug, Default, Clone, PartialEq, Eq)]
58#[cfg_attr(feature = "mcp_tool", derive(JsonSchema))]
59#[serde(rename_all = "camelCase")]
60pub struct Document {
61    /// A list of the links on the page, present if `ScrapeFormats::Markdown` is present in `ScrapeOptions.formats`. (default)
62    pub markdown: Option<String>,
63
64    /// The HTML of the page, present if `ScrapeFormats::HTML` is present in `ScrapeOptions.formats`.
65    ///
66    /// This contains HTML that has non-content tags removed. If you need the original HTML, use `ScrapeFormats::RawHTML`.
67    pub html: Option<String>,
68
69    /// The raw HTML of the page, present if `ScrapeFormats::RawHTML` is present in `ScrapeOptions.formats`.
70    ///
71    /// This contains the original, untouched HTML on the page. If you only need human-readable content, use `ScrapeFormats::HTML`.
72    pub raw_html: Option<String>,
73
74    /// The URL to the screenshot of the page, present if `ScrapeFormats::Screenshot` or `ScrapeFormats::ScreenshotFullPage` is present in `ScrapeOptions.formats`.
75    pub screenshot: Option<String>,
76
77    /// A list of the links on the page, present if `ScrapeFormats::Links` is present in `ScrapeOptions.formats`.
78    pub links: Option<Vec<String>>,
79
80    /// The extracted data from the page, present if `ScrapeFormats::Extract` is present in `ScrapeOptions.formats`.
81    /// If `ScrapeOptions.extract.schema` is `Some`, this `Value` is guaranteed to match the provided schema.
82    pub extract: Option<Value>,
83
84    /// The structured JSON data from the page, present if `ScrapeFormats::JSON` is present in `ScrapeOptions.formats`.
85    /// If `ScrapeOptions.jsonOptions.schema` is `Some`, this `Value` is guaranteed to match the provided schema.
86    pub json: Option<Value>,
87
88    /// The metadata from the page.
89    pub metadata: DocumentMetadata,
90
91    /// Can be present if `ScrapeFormats::Extract` is present in `ScrapeOptions.formats`.
92    /// The warning message will contain any errors encountered during the extraction.
93    pub warning: Option<String>,
94}