browser_use/tools/
markdown.rs1use crate::error::{BrowserError, Result};
2use crate::tools::html_to_markdown::convert_html_to_markdown;
3use crate::tools::readability_script::READABILITY_SCRIPT;
4use crate::tools::{Tool, ToolContext, ToolResult};
5use schemars::JsonSchema;
6use serde::{Deserialize, Serialize};
7
8#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
10pub struct GetMarkdownParams {
11 #[serde(default = "default_page")]
13 pub page: usize,
14
15 #[serde(default = "default_page_size")]
17 pub page_size: usize,
18}
19
20fn default_page() -> usize {
21 1
22}
23
24fn default_page_size() -> usize {
25 100_000
26}
27
28impl Default for GetMarkdownParams {
29 fn default() -> Self {
30 Self {
31 page: default_page(),
32 page_size: default_page_size(),
33 }
34 }
35}
36
37#[derive(Default)]
38pub struct GetMarkdownTool;
39
40impl Tool for GetMarkdownTool {
41 type Params = GetMarkdownParams;
42
43 fn name(&self) -> &str {
44 "get_markdown"
45 }
46
47 fn execute_typed(
48 &self,
49 params: GetMarkdownParams,
50 context: &mut ToolContext,
51 ) -> Result<ToolResult> {
52 std::thread::sleep(std::time::Duration::from_millis(1000));
56
57 let js_code = format!(
61 "var READABILITY_SCRIPT = {};\n{}",
62 serde_json::to_string(READABILITY_SCRIPT).unwrap(),
63 include_str!("convert_to_markdown.js")
64 );
65
66 let result = context
68 .session
69 .tab()?
70 .evaluate(&js_code, false)
71 .map_err(|e| BrowserError::EvaluationFailed(e.to_string()))?;
72
73 let result_value = result.value.ok_or_else(|| {
75 let description = result
77 .description
78 .map(|d| format!("Description: {}", d))
79 .unwrap_or_else(|| format!("Type: {:?}", result.Type));
80
81 BrowserError::ToolExecutionFailed {
82 tool: "get_markdown".to_string(),
83 reason: format!("No value returned from JavaScript. {}", description),
84 }
85 })?;
86
87 let extraction_result: ExtractionResult = if let Some(json_str) = result_value.as_str() {
89 serde_json::from_str(json_str).map_err(|e| BrowserError::ToolExecutionFailed {
90 tool: "get_markdown".to_string(),
91 reason: format!("Failed to parse extraction result: {}", e),
92 })?
93 } else {
94 serde_json::from_value(result_value).map_err(|e| BrowserError::ToolExecutionFailed {
96 tool: "get_markdown".to_string(),
97 reason: format!("Failed to deserialize extraction result: {}", e),
98 })?
99 };
100
101 if extraction_result.readability_failed {
103 return Err(BrowserError::ToolExecutionFailed {
104 tool: "get_markdown".to_string(),
105 reason: extraction_result
106 .error
107 .unwrap_or_else(|| "Readability extraction failed".to_string()),
108 });
109 }
110
111 let full_markdown = convert_html_to_markdown(&extraction_result.content);
113
114 let total_pages = if full_markdown.is_empty() {
116 1
117 } else {
118 (full_markdown.len() + params.page_size - 1) / params.page_size
119 };
120
121 let current_page = params.page.clamp(1, total_pages.max(1));
123
124 let start_idx = (current_page - 1) * params.page_size;
126 let end_idx = (start_idx + params.page_size).min(full_markdown.len());
127
128 let mut page_content = if start_idx < full_markdown.len() {
130 full_markdown[start_idx..end_idx].to_string()
131 } else {
132 String::new()
133 };
134
135 if current_page == 1 && !extraction_result.title.is_empty() {
137 page_content = format!("# {}\n\n{}", extraction_result.title, page_content);
138 }
139
140 if total_pages > 1 {
142 let pagination_info = if current_page < total_pages {
143 format!(
144 "\n\n---\n\n*Page {} of {}. There are {} more page(s) with additional content.*\n",
145 current_page,
146 total_pages,
147 total_pages - current_page
148 )
149 } else {
150 format!(
151 "\n\n---\n\n*Page {} of {}. This is the last page.*\n",
152 current_page, total_pages
153 )
154 };
155 page_content.push_str(&pagination_info);
156 }
157
158 Ok(ToolResult::success_with(serde_json::json!({
160 "markdown": page_content,
161 "title": extraction_result.title,
162 "url": extraction_result.url,
163 "currentPage": current_page,
164 "totalPages": total_pages,
165 "hasMorePages": current_page < total_pages,
166 "length": page_content.len(),
167 "byline": extraction_result.byline,
168 "excerpt": extraction_result.excerpt,
169 "siteName": extraction_result.site_name,
170 })))
171 }
172}
173
174#[derive(Debug, Serialize, Deserialize)]
176#[serde(rename_all = "camelCase")]
177struct ExtractionResult {
178 title: String,
179 content: String,
180 text_content: String,
181 url: String,
182 #[serde(default)]
183 excerpt: String,
184 #[serde(default)]
185 byline: String,
186 #[serde(default)]
187 site_name: String,
188 #[serde(default)]
189 length: usize,
190 #[serde(default)]
191 lang: String,
192 #[serde(default)]
193 dir: String,
194 #[serde(default)]
195 published_time: String,
196 #[serde(default)]
197 readability_failed: bool,
198 #[serde(default)]
199 error: Option<String>,
200}