firecrawl_sdk/
map.rs

1use serde::{Deserialize, Serialize};
2
3#[cfg(feature = "mcp_tool")]
4use schemars::JsonSchema;
5
6use crate::{FirecrawlApp, FirecrawlError, API_VERSION};
7
8#[serde_with::skip_serializing_none]
9#[derive(Deserialize, Serialize, Debug, Default, Clone, PartialEq, Eq)]
10#[cfg_attr(feature = "mcp_tool", derive(JsonSchema))]
11#[serde(rename_all = "camelCase")]
12pub struct MapOptions {
13    /// Optional search term to filter URLs
14    pub search: Option<String>,
15
16    /// Skip sitemap.xml discovery and only use HTML links
17    pub ignore_sitemap: Option<bool>,
18
19    /// Only use sitemap.xml for discovery, ignore HTML links
20    pub sitemap_only: Option<bool>,
21
22    /// Include URLs from subdomains in results
23    pub include_subdomains: Option<bool>,
24
25    /// Maximum number of URLs to return
26    pub limit: Option<u32>,
27}
28
29#[derive(Deserialize, Serialize, Debug, Default, Clone, PartialEq, Eq)]
30#[serde(rename_all = "camelCase")]
31struct MapRequestBody {
32    url: String,
33
34    #[serde(flatten)]
35    options: MapOptions,
36}
37
38#[derive(Deserialize, Serialize, Debug, Default, Clone, PartialEq, Eq)]
39#[serde(rename_all = "camelCase")]
40struct MapResponse {
41    success: bool,
42
43    links: Vec<String>,
44}
45
46impl FirecrawlApp {
47    /// Returns links from a URL using the Firecrawl API.
48    pub async fn map_url(
49        &self,
50        url: impl AsRef<str>,
51        options: impl Into<Option<MapOptions>>,
52    ) -> Result<Vec<String>, FirecrawlError> {
53        let body = MapRequestBody {
54            url: url.as_ref().to_string(),
55            options: options.into().unwrap_or_default(),
56        };
57
58        let headers = self.prepare_headers(None);
59
60        let response = self
61            .client
62            .post(format!("{}/{}/map", self.api_url, API_VERSION))
63            .headers(headers)
64            .json(&body)
65            .send()
66            .await
67            .map_err(|e| FirecrawlError::HttpError(format!("Mapping {:?}", url.as_ref()), e))?;
68
69        let response = self
70            .handle_response::<MapResponse>(response, "scrape URL")
71            .await?;
72
73        Ok(response.links)
74    }
75}
76
77#[cfg(all(test, feature = "mcp_tool"))]
78mod schema_tests {
79    use super::*;
80    use async_claude;
81
82    #[test]
83    fn test_map_options_schema() {
84        let actual_schema = async_claude::tool::parse_input_schema::<MapOptions>().unwrap();
85
86        // Check basic structure
87        assert_eq!(actual_schema["type"], "object");
88
89        // Get properties object
90        let properties = &actual_schema["properties"];
91        assert!(properties.is_object());
92
93        // Check all expected properties exist
94        let expected_properties = [
95            "search",
96            "ignoreSitemap",
97            "sitemapOnly",
98            "includeSubdomains",
99            "limit",
100        ];
101
102        for prop in expected_properties.iter() {
103            assert!(
104                properties.get(*prop).is_some(),
105                "Property {} not found",
106                prop
107            );
108        }
109
110        // Check property types
111        assert_eq!(properties["search"]["type"], "string");
112        assert_eq!(properties["ignoreSitemap"]["type"], "boolean");
113        assert_eq!(properties["sitemapOnly"]["type"], "boolean");
114        assert_eq!(properties["includeSubdomains"]["type"], "boolean");
115        assert!(
116            properties["limit"]["type"] == "integer" || properties["limit"]["type"] == "number",
117            "Property limit should be numeric"
118        );
119
120        // Check descriptions
121        assert_eq!(
122            properties["search"]["description"],
123            "Optional search term to filter URLs"
124        );
125        assert_eq!(
126            properties["ignoreSitemap"]["description"],
127            "Skip sitemap.xml discovery and only use HTML links"
128        );
129        assert_eq!(
130            properties["sitemapOnly"]["description"],
131            "Only use sitemap.xml for discovery, ignore HTML links"
132        );
133        assert_eq!(
134            properties["includeSubdomains"]["description"],
135            "Include URLs from subdomains in results"
136        );
137        assert_eq!(
138            properties["limit"]["description"],
139            "Maximum number of URLs to return"
140        );
141    }
142}
143
144#[cfg(test)]
145mod tests {
146    use super::*;
147    use serde_json::json;
148
149    #[test]
150    fn test_map_options_deserialization() {
151        // Create test JSON data
152        let json_data = json!({
153            "search": "keyword",
154            "ignoreSitemap": true,
155            "sitemapOnly": false,
156            "includeSubdomains": true,
157            "limit": 100
158        });
159
160        // Deserialize the JSON to our struct
161        let options: MapOptions =
162            serde_json::from_value(json_data).expect("Failed to deserialize MapOptions");
163
164        // Create expected struct directly
165        let expected_options = MapOptions {
166            search: Some("keyword".to_string()),
167            ignore_sitemap: Some(true),
168            sitemap_only: Some(false),
169            include_subdomains: Some(true),
170            limit: Some(100),
171        };
172
173        // Compare the entire structs
174        assert_eq!(options, expected_options);
175    }
176
177    #[test]
178    fn test_map_request_deserialization() {
179        // Create test JSON data
180        let json_data = json!({
181            "url": "https://example.com",
182            "search": "keyword",
183            "ignoreSitemap": true,
184            "sitemapOnly": false,
185            "includeSubdomains": true,
186            "limit": 100
187        });
188
189        // Deserialize the JSON to our struct
190        let request_body: MapRequestBody =
191            serde_json::from_value(json_data).expect("Failed to deserialize MapRequestBody");
192
193        // Create expected struct directly
194        let expected_request_body = MapRequestBody {
195            url: "https://example.com".to_string(),
196            options: MapOptions {
197                search: Some("keyword".to_string()),
198                ignore_sitemap: Some(true),
199                sitemap_only: Some(false),
200                include_subdomains: Some(true),
201                limit: Some(100),
202            },
203        };
204
205        // Compare the entire structs
206        assert_eq!(request_body, expected_request_body);
207    }
208
209    #[test]
210    fn test_map_response_deserialization() {
211        // Create test JSON data
212        let json_data = json!({
213            "success": true,
214            "links": [
215                "https://example.com/page1",
216                "https://example.com/page2",
217                "https://example.com/page3"
218            ]
219        });
220
221        // Deserialize the JSON to our struct
222        let response: MapResponse =
223            serde_json::from_value(json_data).expect("Failed to deserialize MapResponse");
224
225        // Create expected struct directly
226        let expected_response = MapResponse {
227            success: true,
228            links: vec![
229                "https://example.com/page1".to_string(),
230                "https://example.com/page2".to_string(),
231                "https://example.com/page3".to_string(),
232            ],
233        };
234
235        // Compare the entire structs
236        assert_eq!(response, expected_response);
237    }
238}