Skip to main content

zeroclaw/tools/
text_browser.rs

1use super::traits::{Tool, ToolResult};
2use crate::security::SecurityPolicy;
3use async_trait::async_trait;
4use serde_json::json;
5use std::sync::Arc;
6use std::time::Duration;
7
8/// Text browser tool: renders web pages as plain text using text-based browsers
9/// (lynx, links, w3m). Ideal for headless/SSH environments where graphical
10/// browsers are unavailable.
11pub struct TextBrowserTool {
12    security: Arc<SecurityPolicy>,
13    preferred_browser: Option<String>,
14    timeout_secs: u64,
15    max_response_size: usize,
16}
17
18/// The text browsers we support, in order of auto-detection preference.
19const SUPPORTED_BROWSERS: &[&str] = &["lynx", "links", "w3m"];
20
21impl TextBrowserTool {
22    pub fn new(
23        security: Arc<SecurityPolicy>,
24        preferred_browser: Option<String>,
25        timeout_secs: u64,
26    ) -> Self {
27        Self {
28            security,
29            preferred_browser,
30            timeout_secs,
31            max_response_size: 500_000, // 500KB, consistent with web_fetch
32        }
33    }
34
35    fn validate_url(url: &str) -> anyhow::Result<String> {
36        let url = url.trim();
37
38        if url.is_empty() {
39            anyhow::bail!("URL cannot be empty");
40        }
41
42        if url.chars().any(char::is_whitespace) {
43            anyhow::bail!("URL cannot contain whitespace");
44        }
45
46        if !url.starts_with("http://") && !url.starts_with("https://") {
47            anyhow::bail!("Only http:// and https:// URLs are allowed");
48        }
49
50        Ok(url.to_string())
51    }
52
53    fn truncate_response(&self, text: &str) -> String {
54        if text.len() > self.max_response_size {
55            let mut truncated = text
56                .chars()
57                .take(self.max_response_size)
58                .collect::<String>();
59            truncated.push_str("\n\n... [Response truncated due to size limit] ...");
60            truncated
61        } else {
62            text.to_string()
63        }
64    }
65
66    /// Detect which text browser is available on the system.
67    async fn detect_browser() -> Option<String> {
68        for browser in SUPPORTED_BROWSERS {
69            if let Ok(output) = tokio::process::Command::new("which")
70                .arg(browser)
71                .output()
72                .await
73            {
74                if output.status.success() {
75                    return Some((*browser).to_string());
76                }
77            }
78        }
79        None
80    }
81
82    /// Resolve which browser to use: prefer configured, then auto-detect.
83    async fn resolve_browser(&self, requested: Option<&str>) -> anyhow::Result<String> {
84        // If the caller explicitly requested a browser via the tool parameter, use it.
85        if let Some(browser) = requested {
86            let browser = browser.trim().to_lowercase();
87            if !SUPPORTED_BROWSERS.contains(&browser.as_str()) {
88                anyhow::bail!(
89                    "Unsupported text browser '{browser}'. Supported: {}",
90                    SUPPORTED_BROWSERS.join(", ")
91                );
92            }
93            // Verify it's installed
94            let installed = tokio::process::Command::new("which")
95                .arg(&browser)
96                .output()
97                .await
98                .map(|o| o.status.success())
99                .unwrap_or(false);
100            if !installed {
101                anyhow::bail!("Requested text browser '{browser}' is not installed");
102            }
103            return Ok(browser);
104        }
105
106        // If a preferred browser is set in config, try it first.
107        if let Some(ref preferred) = self.preferred_browser {
108            let preferred = preferred.trim().to_lowercase();
109            if SUPPORTED_BROWSERS.contains(&preferred.as_str()) {
110                let installed = tokio::process::Command::new("which")
111                    .arg(&preferred)
112                    .output()
113                    .await
114                    .map(|o| o.status.success())
115                    .unwrap_or(false);
116                if installed {
117                    return Ok(preferred);
118                }
119                tracing::warn!(
120                    "Configured preferred text browser '{preferred}' is not installed, falling back to auto-detect"
121                );
122            }
123        }
124
125        // Auto-detect
126        Self::detect_browser().await.ok_or_else(|| {
127            anyhow::anyhow!(
128                "No text browser found. Install one of: {}",
129                SUPPORTED_BROWSERS.join(", ")
130            )
131        })
132    }
133
134    /// Build the command arguments for the selected browser with `-dump` flag.
135    fn build_dump_args(_browser: &str, url: &str) -> Vec<String> {
136        // All supported browsers (lynx, links, w3m) use the same `-dump` flag
137        vec!["-dump".to_string(), url.to_string()]
138    }
139}
140
141#[async_trait]
142impl Tool for TextBrowserTool {
143    fn name(&self) -> &str {
144        "text_browser"
145    }
146
147    fn description(&self) -> &str {
148        "Render a web page as plain text using a text-based browser (lynx, links, or w3m). \
149         Ideal for headless/SSH environments without a graphical browser. \
150         Auto-detects available browser or uses a configured preference."
151    }
152
153    fn parameters_schema(&self) -> serde_json::Value {
154        json!({
155            "type": "object",
156            "properties": {
157                "url": {
158                    "type": "string",
159                    "description": "The HTTP or HTTPS URL to render as plain text"
160                },
161                "browser": {
162                    "type": "string",
163                    "description": "Text browser to use: \"lynx\", \"links\", or \"w3m\". If omitted, auto-detects an available browser.",
164                    "enum": ["lynx", "links", "w3m"]
165                }
166            },
167            "required": ["url"]
168        })
169    }
170
171    async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
172        let url = args
173            .get("url")
174            .and_then(|v| v.as_str())
175            .ok_or_else(|| anyhow::anyhow!("Missing 'url' parameter"))?;
176
177        if !self.security.can_act() {
178            return Ok(ToolResult {
179                success: false,
180                output: String::new(),
181                error: Some("Action blocked: autonomy is read-only".into()),
182            });
183        }
184
185        if !self.security.record_action() {
186            return Ok(ToolResult {
187                success: false,
188                output: String::new(),
189                error: Some("Action blocked: rate limit exceeded".into()),
190            });
191        }
192
193        let url = match Self::validate_url(url) {
194            Ok(v) => v,
195            Err(e) => {
196                return Ok(ToolResult {
197                    success: false,
198                    output: String::new(),
199                    error: Some(e.to_string()),
200                });
201            }
202        };
203
204        let requested_browser = args.get("browser").and_then(|v| v.as_str());
205
206        let browser = match self.resolve_browser(requested_browser).await {
207            Ok(b) => b,
208            Err(e) => {
209                return Ok(ToolResult {
210                    success: false,
211                    output: String::new(),
212                    error: Some(e.to_string()),
213                });
214            }
215        };
216
217        let dump_args = Self::build_dump_args(&browser, &url);
218
219        let timeout = Duration::from_secs(if self.timeout_secs == 0 {
220            tracing::warn!("text_browser: timeout_secs is 0, using safe default of 30s");
221            30
222        } else {
223            self.timeout_secs
224        });
225
226        let result = tokio::time::timeout(
227            timeout,
228            tokio::process::Command::new(&browser)
229                .args(&dump_args)
230                .output(),
231        )
232        .await;
233
234        match result {
235            Ok(Ok(output)) => {
236                if output.status.success() {
237                    let text = String::from_utf8_lossy(&output.stdout).into_owned();
238                    let text = self.truncate_response(&text);
239                    Ok(ToolResult {
240                        success: true,
241                        output: text,
242                        error: None,
243                    })
244                } else {
245                    let stderr = String::from_utf8_lossy(&output.stderr);
246                    Ok(ToolResult {
247                        success: false,
248                        output: String::new(),
249                        error: Some(format!(
250                            "{browser} exited with status {}: {}",
251                            output.status,
252                            stderr.trim()
253                        )),
254                    })
255                }
256            }
257            Ok(Err(e)) => Ok(ToolResult {
258                success: false,
259                output: String::new(),
260                error: Some(format!("Failed to execute {browser}: {e}")),
261            }),
262            Err(_) => Ok(ToolResult {
263                success: false,
264                output: String::new(),
265                error: Some(format!(
266                    "{browser} timed out after {} seconds",
267                    timeout.as_secs()
268                )),
269            }),
270        }
271    }
272}
273
274#[cfg(test)]
275mod tests {
276    use super::*;
277    use crate::security::{AutonomyLevel, SecurityPolicy};
278
279    fn test_tool() -> TextBrowserTool {
280        let security = Arc::new(SecurityPolicy {
281            autonomy: AutonomyLevel::Supervised,
282            ..SecurityPolicy::default()
283        });
284        TextBrowserTool::new(security, None, 30)
285    }
286
287    #[test]
288    fn name_is_text_browser() {
289        let tool = test_tool();
290        assert_eq!(tool.name(), "text_browser");
291    }
292
293    #[test]
294    fn parameters_schema_requires_url() {
295        let tool = test_tool();
296        let schema = tool.parameters_schema();
297        assert!(schema["properties"]["url"].is_object());
298        let required = schema["required"].as_array().unwrap();
299        assert!(required.iter().any(|v| v.as_str() == Some("url")));
300    }
301
302    #[test]
303    fn parameters_schema_has_optional_browser() {
304        let tool = test_tool();
305        let schema = tool.parameters_schema();
306        assert!(schema["properties"]["browser"].is_object());
307        let required = schema["required"].as_array().unwrap();
308        assert!(!required.iter().any(|v| v.as_str() == Some("browser")));
309    }
310
311    #[test]
312    fn validate_url_accepts_http() {
313        let got = TextBrowserTool::validate_url("http://example.com/page").unwrap();
314        assert_eq!(got, "http://example.com/page");
315    }
316
317    #[test]
318    fn validate_url_accepts_https() {
319        let got = TextBrowserTool::validate_url("https://example.com/page").unwrap();
320        assert_eq!(got, "https://example.com/page");
321    }
322
323    #[test]
324    fn validate_url_rejects_empty() {
325        let err = TextBrowserTool::validate_url("").unwrap_err().to_string();
326        assert!(err.contains("empty"));
327    }
328
329    #[test]
330    fn validate_url_rejects_ftp() {
331        let err = TextBrowserTool::validate_url("ftp://example.com")
332            .unwrap_err()
333            .to_string();
334        assert!(err.contains("http://") || err.contains("https://"));
335    }
336
337    #[test]
338    fn validate_url_rejects_whitespace() {
339        let err = TextBrowserTool::validate_url("https://example.com/hello world")
340            .unwrap_err()
341            .to_string();
342        assert!(err.contains("whitespace"));
343    }
344
345    #[test]
346    fn truncate_within_limit() {
347        let tool = test_tool();
348        let text = "hello world";
349        assert_eq!(tool.truncate_response(text), "hello world");
350    }
351
352    #[test]
353    fn truncate_over_limit() {
354        let security = Arc::new(SecurityPolicy::default());
355        let mut tool = TextBrowserTool::new(security, None, 30);
356        tool.max_response_size = 10;
357        let text = "hello world this is long";
358        let truncated = tool.truncate_response(text);
359        assert!(truncated.contains("[Response truncated"));
360    }
361
362    #[test]
363    fn build_dump_args_lynx() {
364        let args = TextBrowserTool::build_dump_args("lynx", "https://example.com");
365        assert_eq!(args, vec!["-dump", "https://example.com"]);
366    }
367
368    #[test]
369    fn build_dump_args_links() {
370        let args = TextBrowserTool::build_dump_args("links", "https://example.com");
371        assert_eq!(args, vec!["-dump", "https://example.com"]);
372    }
373
374    #[test]
375    fn build_dump_args_w3m() {
376        let args = TextBrowserTool::build_dump_args("w3m", "https://example.com");
377        assert_eq!(args, vec!["-dump", "https://example.com"]);
378    }
379
380    #[tokio::test]
381    async fn blocks_readonly_mode() {
382        let security = Arc::new(SecurityPolicy {
383            autonomy: AutonomyLevel::ReadOnly,
384            ..SecurityPolicy::default()
385        });
386        let tool = TextBrowserTool::new(security, None, 30);
387        let result = tool
388            .execute(json!({"url": "https://example.com"}))
389            .await
390            .unwrap();
391        assert!(!result.success);
392        assert!(result.error.unwrap().contains("read-only"));
393    }
394
395    #[tokio::test]
396    async fn blocks_rate_limited() {
397        let security = Arc::new(SecurityPolicy {
398            max_actions_per_hour: 0,
399            ..SecurityPolicy::default()
400        });
401        let tool = TextBrowserTool::new(security, None, 30);
402        let result = tool
403            .execute(json!({"url": "https://example.com"}))
404            .await
405            .unwrap();
406        assert!(!result.success);
407        assert!(result.error.unwrap().contains("rate limit"));
408    }
409}