use argentor_core::{ArgentorResult, ToolCall, ToolResult};
use argentor_security::Capability;
use argentor_skills::skill::{Skill, SkillDescriptor};
use async_trait::async_trait;
use std::time::Duration;
use tracing::info;
const MAX_PAGE_SIZE: usize = 5 * 1024 * 1024;
pub struct BrowserSkill {
descriptor: SkillDescriptor,
client: reqwest::Client,
}
impl BrowserSkill {
pub fn new() -> Self {
#[allow(clippy::expect_used)]
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(30))
.redirect(reqwest::redirect::Policy::limited(10))
.user_agent("Argentor/0.1 (AI Agent Browser)")
.build()
.expect("Failed to create HTTP client for browser — TLS backend unavailable");
Self {
descriptor: SkillDescriptor {
name: "browser".to_string(),
description: "Browse a web page and extract its text content, links, and metadata."
.to_string(),
parameters_schema: serde_json::json!({
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to browse"
},
"extract": {
"type": "string",
"enum": ["text", "links", "all"],
"description": "What to extract: 'text' (default), 'links', or 'all'"
}
},
"required": ["url"]
}),
required_capabilities: vec![Capability::NetworkAccess {
allowed_hosts: vec![],
}],
requires_approval: false,
},
client,
}
}
}
impl Default for BrowserSkill {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl Skill for BrowserSkill {
fn descriptor(&self) -> &SkillDescriptor {
&self.descriptor
}
async fn execute(&self, call: ToolCall) -> ArgentorResult<ToolResult> {
let url = call.arguments["url"]
.as_str()
.unwrap_or_default()
.to_string();
if url.is_empty() {
return Ok(ToolResult::error(&call.id, "Empty URL"));
}
let parsed = match reqwest::Url::parse(&url) {
Ok(u) => u,
Err(e) => {
return Ok(ToolResult::error(
&call.id,
format!("Invalid URL '{url}': {e}"),
));
}
};
match parsed.scheme() {
"http" | "https" => {}
scheme => {
return Ok(ToolResult::error(
&call.id,
format!("Unsupported scheme '{scheme}'. Only http/https."),
));
}
}
if let Some(host) = parsed.host_str() {
if is_private_host(host) {
return Ok(ToolResult::error(
&call.id,
format!("Access denied: '{host}' is a private address"),
));
}
}
let extract = call.arguments["extract"]
.as_str()
.unwrap_or("text")
.to_string();
info!(url = %url, extract = %extract, "Browser fetch");
let response = match self.client.get(&url).send().await {
Ok(r) => r,
Err(e) => {
return Ok(ToolResult::error(
&call.id,
format!("Failed to fetch '{url}': {e}"),
));
}
};
let status = response.status().as_u16();
let final_url = response.url().to_string();
if !response.status().is_success() {
return Ok(ToolResult::error(
&call.id,
format!("HTTP {status} from {final_url}"),
));
}
let body_bytes = match response.bytes().await {
Ok(b) => b,
Err(e) => {
return Ok(ToolResult::error(
&call.id,
format!("Failed to read body: {e}"),
));
}
};
if body_bytes.len() > MAX_PAGE_SIZE {
return Ok(ToolResult::error(
&call.id,
format!(
"Page too large: {} bytes (max {})",
body_bytes.len(),
MAX_PAGE_SIZE
),
));
}
let html = String::from_utf8_lossy(&body_bytes).to_string();
let title = extract_title(&html);
let text = extract_text(&html);
let links = extract_links(&html, &final_url);
let result = match extract.as_str() {
"links" => serde_json::json!({
"url": final_url,
"status": status,
"title": title,
"links": links,
}),
"all" => serde_json::json!({
"url": final_url,
"status": status,
"title": title,
"text": truncate_text(&text, 50_000),
"links": links,
}),
_ => serde_json::json!({
"url": final_url,
"status": status,
"title": title,
"text": truncate_text(&text, 50_000),
}),
};
Ok(ToolResult::success(&call.id, result.to_string()))
}
}
fn extract_title(html: &str) -> String {
let lower = html.to_lowercase();
if let Some(start) = lower.find("<title") {
if let Some(tag_end) = lower[start..].find('>') {
let content_start = start + tag_end + 1;
if let Some(end) = lower[content_start..].find("</title>") {
return html[content_start..content_start + end].trim().to_string();
}
}
}
String::new()
}
fn extract_text(html: &str) -> String {
let mut result = String::with_capacity(html.len() / 2);
let mut in_tag = false;
let mut in_script = false;
let mut in_style = false;
let lower = html.to_lowercase();
let chars: Vec<char> = html.chars().collect();
let lower_chars: Vec<char> = lower.chars().collect();
let len = chars.len();
let mut i = 0;
while i < len {
if !in_tag && chars[i] == '<' {
let remaining: String = lower_chars[i..std::cmp::min(i + 20, len)].iter().collect();
if remaining.starts_with("<script") {
in_script = true;
} else if remaining.starts_with("</script") {
in_script = false;
} else if remaining.starts_with("<style") {
in_style = true;
} else if remaining.starts_with("</style") {
in_style = false;
}
in_tag = true;
i += 1;
continue;
}
if in_tag {
if chars[i] == '>' {
in_tag = false;
if !result.ends_with(' ') && !result.ends_with('\n') {
result.push(' ');
}
}
i += 1;
continue;
}
if in_script || in_style {
i += 1;
continue;
}
if chars[i] == '&' {
let remaining: String = chars[i..std::cmp::min(i + 10, len)].iter().collect();
if remaining.starts_with("&") {
result.push('&');
i += 5;
continue;
} else if remaining.starts_with("<") {
result.push('<');
i += 4;
continue;
} else if remaining.starts_with(">") {
result.push('>');
i += 4;
continue;
} else if remaining.starts_with(""") {
result.push('"');
i += 6;
continue;
} else if remaining.starts_with(" ") {
result.push(' ');
i += 6;
continue;
} else if remaining.starts_with("'") || remaining.starts_with("'") {
result.push('\'');
i += if remaining.starts_with("'") { 5 } else { 6 };
continue;
}
}
result.push(chars[i]);
i += 1;
}
let mut cleaned = String::with_capacity(result.len());
let mut prev_space = false;
for ch in result.chars() {
if ch.is_whitespace() {
if !prev_space {
cleaned.push(if ch == '\n' { '\n' } else { ' ' });
}
prev_space = true;
} else {
cleaned.push(ch);
prev_space = false;
}
}
cleaned.trim().to_string()
}
fn extract_links(html: &str, base_url: &str) -> Vec<serde_json::Value> {
let mut links = Vec::new();
let lower = html.to_lowercase();
let base = reqwest::Url::parse(base_url).ok();
let mut search_from = 0;
while let Some(pos) = lower[search_from..].find("href=") {
let abs_pos = search_from + pos + 5;
search_from = abs_pos;
if abs_pos >= lower.len() {
break;
}
let quote = html.as_bytes().get(abs_pos).copied();
let (start, end_char) = match quote {
Some(b'"') => (abs_pos + 1, '"'),
Some(b'\'') => (abs_pos + 1, '\''),
_ => continue,
};
if let Some(end) = html[start..].find(end_char) {
let href = &html[start..start + end];
if href.starts_with('#') || href.starts_with("javascript:") {
continue;
}
let full_url = if href.starts_with("http://") || href.starts_with("https://") {
href.to_string()
} else if let Some(ref base) = base {
base.join(href)
.map_or_else(|_| href.to_string(), |u| u.to_string())
} else {
href.to_string()
};
let after_href = start + end;
let text = extract_link_text(html, after_href);
links.push(serde_json::json!({
"url": full_url,
"text": text,
}));
}
}
links
}
fn extract_link_text(html: &str, from: usize) -> String {
if let Some(tag_close) = html[from..].find('>') {
let text_start = from + tag_close + 1;
if let Some(tag_open) = html[text_start..].find('<') {
let text = html[text_start..text_start + tag_open].trim();
if !text.is_empty() {
return text.to_string();
}
}
}
String::new()
}
fn truncate_text(text: &str, max_chars: usize) -> &str {
if text.len() <= max_chars {
text
} else {
let mut end = max_chars;
while end > 0 && !text.is_char_boundary(end) {
end -= 1;
}
&text[..end]
}
}
fn is_private_host(host: &str) -> bool {
let private_patterns = [
"localhost",
"127.",
"10.",
"172.16.",
"172.17.",
"172.18.",
"172.19.",
"172.20.",
"172.21.",
"172.22.",
"172.23.",
"172.24.",
"172.25.",
"172.26.",
"172.27.",
"172.28.",
"172.29.",
"172.30.",
"172.31.",
"192.168.",
"169.254.",
"0.0.0.0",
"[::1]",
"metadata.google",
"metadata.aws",
];
let host_lower = host.to_lowercase();
private_patterns
.iter()
.any(|p| host_lower.starts_with(p) || host_lower == *p)
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
#[test]
fn test_extract_title() {
let html = "<html><head><title>My Page</title></head><body></body></html>";
assert_eq!(extract_title(html), "My Page");
}
#[test]
fn test_extract_title_missing() {
let html = "<html><body>No title here</body></html>";
assert_eq!(extract_title(html), "");
}
#[test]
fn test_extract_text_simple() {
let html = "<html><body><h1>Hello</h1><p>World</p></body></html>";
let text = extract_text(html);
assert!(text.contains("Hello"));
assert!(text.contains("World"));
}
#[test]
fn test_extract_text_strips_script() {
let html = "<html><body><p>Before</p><script>var x = 1;</script><p>After</p></body></html>";
let text = extract_text(html);
assert!(text.contains("Before"));
assert!(text.contains("After"));
assert!(!text.contains("var x"));
}
#[test]
fn test_extract_text_strips_style() {
let html = "<html><body><style>.a{color:red}</style><p>Content</p></body></html>";
let text = extract_text(html);
assert!(text.contains("Content"));
assert!(!text.contains("color"));
}
#[test]
fn test_extract_text_entities() {
let html = "<p>A & B < C > D "E"</p>";
let text = extract_text(html);
assert!(text.contains("A & B < C > D \"E\""));
}
#[test]
fn test_extract_links() {
let html = r#"<a href="https://example.com">Example</a><a href="/about">About</a>"#;
let links = extract_links(html, "https://mysite.com");
assert_eq!(links.len(), 2);
assert_eq!(links[0]["url"], "https://example.com");
assert_eq!(links[0]["text"], "Example");
assert_eq!(links[1]["url"], "https://mysite.com/about");
assert_eq!(links[1]["text"], "About");
}
#[test]
fn test_extract_links_skips_fragments() {
let html = r##"<a href="#section">Section</a><a href="javascript:void(0)">JS</a>"##;
let links = extract_links(html, "https://mysite.com");
assert!(links.is_empty());
}
#[test]
fn test_truncate_text() {
assert_eq!(truncate_text("hello", 10), "hello");
assert_eq!(truncate_text("hello world", 5), "hello");
}
#[tokio::test]
async fn test_browser_empty_url() {
let skill = BrowserSkill::new();
let call = ToolCall {
id: "t1".to_string(),
name: "browser".to_string(),
arguments: serde_json::json!({"url": ""}),
};
let result = skill.execute(call).await.unwrap();
assert!(result.is_error);
}
#[tokio::test]
async fn test_browser_invalid_scheme() {
let skill = BrowserSkill::new();
let call = ToolCall {
id: "t2".to_string(),
name: "browser".to_string(),
arguments: serde_json::json!({"url": "ftp://example.com"}),
};
let result = skill.execute(call).await.unwrap();
assert!(result.is_error);
assert!(result.content.contains("Unsupported scheme"));
}
#[tokio::test]
async fn test_browser_blocks_ssrf() {
let skill = BrowserSkill::new();
let call = ToolCall {
id: "t3".to_string(),
name: "browser".to_string(),
arguments: serde_json::json!({"url": "http://169.254.169.254/latest"}),
};
let result = skill.execute(call).await.unwrap();
assert!(result.is_error);
assert!(result.content.contains("private"));
}
}