use super::spec::{
ApprovalRequirement, ToolCapability, ToolContext, ToolError, ToolResult, ToolSpec,
optional_u64, required_str,
};
use async_trait::async_trait;
use regex::Regex;
use serde::Serialize;
use serde_json::{Value, json};
use std::sync::OnceLock;
use std::time::Duration;
static TITLE_RE: OnceLock<Regex> = OnceLock::new();
static SNIPPET_RE: OnceLock<Regex> = OnceLock::new();
static TAG_RE: OnceLock<Regex> = OnceLock::new();
fn get_title_re() -> &'static Regex {
TITLE_RE.get_or_init(|| {
Regex::new(r#"<a[^>]*class=\"result__a\"[^>]*href=\"([^\"]+)\"[^>]*>(.*?)</a>"#)
.expect("title regex pattern is valid")
})
}
fn get_snippet_re() -> &'static Regex {
SNIPPET_RE.get_or_init(|| {
Regex::new(
r#"<a[^>]*class=\"result__snippet\"[^>]*>(.*?)</a>|<div[^>]*class=\"result__snippet\"[^>]*>(.*?)</div>"#,
)
.expect("snippet regex pattern is valid")
})
}
fn get_tag_re() -> &'static Regex {
TAG_RE.get_or_init(|| Regex::new(r"<[^>]+>").expect("tag regex pattern is valid"))
}
const DEFAULT_MAX_RESULTS: usize = 5;
const MAX_RESULTS: usize = 10;
const DEFAULT_TIMEOUT_MS: u64 = 15_000;
const USER_AGENT: &str = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15";
#[derive(Debug, Clone, Serialize)]
struct WebSearchEntry {
title: String,
url: String,
snippet: Option<String>,
}
#[derive(Debug, Clone, Serialize)]
struct WebSearchResponse {
query: String,
source: String,
count: usize,
message: String,
results: Vec<WebSearchEntry>,
}
pub struct WebSearchTool;
#[async_trait]
impl ToolSpec for WebSearchTool {
fn name(&self) -> &'static str {
"web_search"
}
fn description(&self) -> &'static str {
"Compatibility web search helper. Prefer web.run for canonical browsing workflows."
}
fn input_schema(&self) -> Value {
json!({
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query"
},
"max_results": {
"type": "integer",
"description": "Maximum number of results to return (default: 5, max: 10)"
},
"timeout_ms": {
"type": "integer",
"description": "Timeout in milliseconds (default: 15000, max: 60000)"
}
},
"required": ["query"]
})
}
fn capabilities(&self) -> Vec<ToolCapability> {
vec![ToolCapability::ReadOnly, ToolCapability::Network]
}
fn approval_requirement(&self) -> ApprovalRequirement {
ApprovalRequirement::Auto
}
async fn execute(&self, input: Value, _context: &ToolContext) -> Result<ToolResult, ToolError> {
let query = required_str(&input, "query")?.trim().to_string();
if query.is_empty() {
return Err(ToolError::invalid_input("Query cannot be empty"));
}
let max_results = usize::try_from(optional_u64(
&input,
"max_results",
DEFAULT_MAX_RESULTS as u64,
))
.unwrap_or(DEFAULT_MAX_RESULTS)
.clamp(1, MAX_RESULTS);
let timeout_ms = optional_u64(&input, "timeout_ms", DEFAULT_TIMEOUT_MS).min(60_000);
let client = reqwest::Client::builder()
.timeout(Duration::from_millis(timeout_ms))
.user_agent(USER_AGENT)
.build()
.map_err(|e| {
ToolError::execution_failed(format!("Failed to build HTTP client: {e}"))
})?;
let encoded = url_encode(&query);
let url = format!("https://html.duckduckgo.com/html/?q={encoded}");
let resp = client
.get(&url)
.header(
"Accept",
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
)
.header("Accept-Language", "en-US,en;q=0.5")
.send()
.await
.map_err(|e| ToolError::execution_failed(format!("Web search request failed: {e}")))?;
let status = resp.status();
let body = resp
.text()
.await
.map_err(|e| ToolError::execution_failed(format!("Failed to read response: {e}")))?;
if !status.is_success() {
return Err(ToolError::execution_failed(format!(
"Web search failed: HTTP {}",
status.as_u16()
)));
}
let results = parse_duckduckgo_results(&body, max_results);
let message = if results.is_empty() {
"No results found".to_string()
} else {
format!("Found {} result(s)", results.len())
};
let response = WebSearchResponse {
query,
source: "duckduckgo".to_string(),
count: results.len(),
message,
results,
};
ToolResult::json(&response).map_err(|e| ToolError::execution_failed(e.to_string()))
}
}
fn parse_duckduckgo_results(html: &str, max_results: usize) -> Vec<WebSearchEntry> {
let title_re = get_title_re();
let snippet_re = get_snippet_re();
let snippets: Vec<String> = snippet_re
.captures_iter(html)
.filter_map(|cap| cap.get(1).or_else(|| cap.get(2)))
.map(|m| normalize_text(m.as_str()))
.collect();
let mut results = Vec::new();
for (idx, cap) in title_re.captures_iter(html).enumerate() {
if results.len() >= max_results {
break;
}
let href = cap.get(1).map(|m| m.as_str()).unwrap_or("");
let title_raw = cap.get(2).map(|m| m.as_str()).unwrap_or("");
let title = normalize_text(title_raw);
if title.is_empty() {
continue;
}
let url = normalize_url(href);
let snippet = snippets
.get(idx)
.map(|s| s.to_string())
.filter(|s| !s.is_empty());
results.push(WebSearchEntry {
title,
url,
snippet,
});
}
results
}
fn normalize_url(href: &str) -> String {
if let Some(uddg) = extract_query_param(href, "uddg") {
let decoded = percent_decode(&uddg);
if !decoded.is_empty() {
return decoded;
}
}
if href.starts_with("//") {
return format!("https:{href}");
}
if href.starts_with('/') {
return format!("https://duckduckgo.com{href}");
}
href.to_string()
}
fn normalize_text(text: &str) -> String {
let stripped = strip_html_tags(text);
let decoded = decode_html_entities(&stripped);
decoded.split_whitespace().collect::<Vec<_>>().join(" ")
}
fn strip_html_tags(text: &str) -> String {
get_tag_re().replace_all(text, "").to_string()
}
fn decode_html_entities(text: &str) -> String {
text.replace("&", "&")
.replace(""", "\"")
.replace("'", "'")
.replace("'", "'")
.replace("<", "<")
.replace(">", ">")
.replace(" ", " ")
}
fn url_encode(input: &str) -> String {
crate::utils::url_encode(input)
}
fn percent_decode(input: &str) -> String {
let bytes = input.as_bytes();
let mut out = Vec::new();
let mut i = 0;
while i < bytes.len() {
match bytes[i] {
b'%' if i + 2 < bytes.len() => {
let hex = &input[i + 1..i + 3];
if let Ok(val) = u8::from_str_radix(hex, 16) {
out.push(val);
i += 3;
continue;
}
out.push(bytes[i]);
}
b'+' => out.push(b' '),
_ => out.push(bytes[i]),
}
i += 1;
}
String::from_utf8_lossy(&out).to_string()
}
fn extract_query_param(url: &str, key: &str) -> Option<String> {
let query = url.split_once('?')?.1;
for part in query.split('&') {
let mut iter = part.splitn(2, '=');
let name = iter.next().unwrap_or("");
if name == key {
return iter.next().map(str::to_string);
}
}
None
}