pub mod read;
pub mod search;
pub mod types;
pub mod youtube;
use async_trait::async_trait;
use reqwest::Client;
use serde_json::json;
use std::sync::OnceLock;
use std::time::Duration;
use super::{truncate_head, truncate_line, Tool, ToolContext, ToolOutput, TruncationResult};
use crate::error::Result;
use types::SearchProvider;
const MAX_OUTPUT_LINES: usize = 2000;
const MAX_OUTPUT_BYTES: usize = 50 * 1024;
const MAX_LINE_CHARS: usize = 500;
fn http_client() -> &'static Client {
static CLIENT: OnceLock<Client> = OnceLock::new();
CLIENT.get_or_init(|| {
Client::builder()
.timeout(Duration::from_secs(30))
.connect_timeout(Duration::from_secs(10))
.pool_idle_timeout(Duration::from_secs(90))
.redirect(reqwest::redirect::Policy::limited(10))
.build()
.expect("failed to build HTTP client")
})
}
pub struct WebTool;
#[async_trait]
impl Tool for WebTool {
fn name(&self) -> &str {
"web"
}
fn label(&self) -> &str {
"Web"
}
fn description(&self) -> &str {
"Search the web or read a page. YouTube URLs are read through native HTTP metadata/transcript extraction."
}
fn parameters(&self) -> serde_json::Value {
json!({
"type": "object",
"properties": {
"action": { "type": "string", "enum": ["search", "read"] },
"query": { "type": "string" },
"url": { "type": "string" },
"provider": { "type": "string", "enum": ["tavily", "exa", "linkup", "perplexity"] },
"maxResults": { "type": "number" }
},
"required": ["action"]
})
}
fn is_readonly(&self) -> bool {
true
}
async fn execute(
&self,
_call_id: &str,
params: serde_json::Value,
ctx: ToolContext,
) -> Result<ToolOutput> {
match params["action"].as_str() {
Some("search") => execute_search(params, &ctx).await,
Some("read") => execute_read(params).await,
Some(other) => Ok(ToolOutput::error(format!("Unknown web action: {other}"))),
None => Ok(ToolOutput::error("Missing 'action' parameter")),
}
}
}
async fn execute_search(params: serde_json::Value, ctx: &ToolContext) -> Result<ToolOutput> {
let query = match params["query"].as_str() {
Some(q) if !q.is_empty() => q,
_ => return Ok(ToolOutput::error("Missing 'query' parameter")),
};
let max_results = params["maxResults"]
.as_u64()
.map(|n| n as usize)
.unwrap_or(5)
.min(20);
let provider = resolve_provider(¶ms, ctx);
let response = match search::search(http_client(), provider, query, max_results).await {
Ok(resp) => resp,
Err(e) => return Ok(ToolOutput::error(e.to_string())),
};
Ok(ToolOutput::text(truncate_output(format_search_response(
&response, query,
))))
}
fn resolve_provider(params: &serde_json::Value, ctx: &ToolContext) -> SearchProvider {
if let Some(name) = params["provider"].as_str() {
match name {
"tavily" => return SearchProvider::Tavily,
"exa" => return SearchProvider::Exa,
"linkup" => return SearchProvider::Linkup,
"perplexity" => return SearchProvider::Perplexity,
_ => {}
}
}
if let Ok(env_provider) = std::env::var("IMP_WEB_PROVIDER") {
match env_provider.to_lowercase().as_str() {
"tavily" => return SearchProvider::Tavily,
"exa" => return SearchProvider::Exa,
"linkup" => return SearchProvider::Linkup,
"perplexity" => return SearchProvider::Perplexity,
_ => {}
}
}
let config_dir = crate::config::Config::user_config_dir();
if let Ok(config) = crate::config::Config::resolve(&config_dir, Some(&ctx.cwd)) {
if let Some(provider) = config.web.search_provider {
return provider;
}
}
for provider in [
SearchProvider::Tavily,
SearchProvider::Exa,
SearchProvider::Linkup,
SearchProvider::Perplexity,
] {
if std::env::var(provider.env_key_name()).is_ok() {
return provider;
}
}
SearchProvider::default()
}
fn format_search_response(response: &types::SearchResponse, query: &str) -> String {
let mut output = format!("Query: \"{}\" ({})\n", query, response.provider.name());
if let Some(answer) = &response.answer {
output.push_str(&format!("\n## Summary\n{answer}\n"));
}
if response.results.is_empty() {
output.push_str("\nNo results found.\n");
return output;
}
output.push_str(&format!(
"\n## Results ({} found)\n",
response.results.len()
));
for result in &response.results {
output.push_str(&format!("\n### {}\n", result.title));
output.push_str(&format!("URL: {}\n", result.url));
if let Some(date) = &result.date {
output.push_str(&format!("Date: {date}\n"));
}
if let Some(snippet) = &result.snippet {
output.push_str(&format!("{snippet}\n"));
}
}
output
}
async fn execute_read(params: serde_json::Value) -> Result<ToolOutput> {
let url = match params["url"].as_str() {
Some(u) if !u.is_empty() => u,
_ => return Ok(ToolOutput::error("Missing 'url' parameter")),
};
let page = match read::fetch_and_extract(http_client(), url).await {
Ok(page) => page,
Err(e) => return Ok(ToolOutput::error(e.to_string())),
};
let title = page.title.as_deref().unwrap_or(url);
let mut output = format!("# {title}\nURL: {}\n", page.url);
if page.was_redirected {
output.push_str(&format!("Requested: {}\n", page.requested_url));
}
output.push_str(&format!("Status: {}\n", page.status_code));
output.push_str(&format!(
"Content-Type: {}\n",
page.content_type.as_deref().unwrap_or("unknown")
));
output.push_str(&format!(
"Format: {} (requested markdown, received {})\n",
page.format_received.name(),
page.format_received.name()
));
output.push_str(&format!(
"Response size: {} bytes → {} chars extracted\n",
page.raw_body_bytes, page.content_length
));
if !page.diagnostics.is_empty() {
output.push_str("\n⚠ Diagnostics:\n");
for warning in &page.diagnostics {
output.push_str(&format!("- {warning}\n"));
}
}
output.push_str("\n---\n\n");
output.push_str("<web_content>\n");
output.push_str(&page.text);
output.push_str("\n</web_content>");
Ok(ToolOutput::text(truncate_output(output)))
}
fn truncate_output(text: String) -> String {
if text.is_empty() {
return text;
}
let truncated_lines = text
.lines()
.map(|line| truncate_line(line, MAX_LINE_CHARS))
.collect::<Vec<_>>()
.join("\n");
let TruncationResult {
content,
truncated,
output_lines,
total_lines,
temp_file,
..
} = truncate_head(&truncated_lines, MAX_OUTPUT_LINES, MAX_OUTPUT_BYTES);
if !truncated {
return content;
}
let mut result = content;
result.push_str(&format!(
"\n[Output truncated: showing first {output_lines} of {total_lines} lines{}]",
temp_file
.as_ref()
.map(|p| format!(". Full output saved to {}", p.display()))
.unwrap_or_default()
));
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn resolve_provider_prefers_env_over_config() {
let dir = tempfile::tempdir().unwrap();
std::fs::create_dir_all(dir.path().join(".imp")).unwrap();
std::fs::write(
dir.path().join(".imp").join("config.toml"),
"[web]\nsearch_provider = \"exa\"\n",
)
.unwrap();
let old = std::env::var("IMP_WEB_PROVIDER").ok();
std::env::set_var("IMP_WEB_PROVIDER", "tavily");
let (tx, _rx) = tokio::sync::mpsc::channel(1);
let (cmd_tx, _cmd_rx) = tokio::sync::mpsc::channel(16);
let ctx = ToolContext {
cwd: dir.path().to_path_buf(),
cancelled: std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)),
update_tx: tx,
command_tx: cmd_tx,
ui: std::sync::Arc::new(crate::ui::NullInterface),
file_cache: std::sync::Arc::new(crate::tools::FileCache::new()),
checkpoint_state: std::sync::Arc::new(crate::tools::CheckpointState::new()),
file_tracker: std::sync::Arc::new(std::sync::Mutex::new(
crate::tools::FileTracker::new(),
)),
anchor_store: std::sync::Arc::new(crate::tools::AnchorStore::new()),
lua_tool_loader: None,
mode: crate::config::AgentMode::Full,
read_max_lines: 500,
turn_mana_review: std::sync::Arc::new(std::sync::Mutex::new(
crate::mana_review::TurnManaReviewAccumulator::default(),
)),
config: std::sync::Arc::new(crate::config::Config::default()),
};
let provider = resolve_provider(&serde_json::json!({}), &ctx);
assert_eq!(provider, SearchProvider::Tavily);
match old {
Some(value) => std::env::set_var("IMP_WEB_PROVIDER", value),
None => std::env::remove_var("IMP_WEB_PROVIDER"),
}
}
#[test]
fn format_search_with_answer() {
let response = types::SearchResponse {
results: vec![types::SearchResult {
title: "Rust Lang".into(),
url: "https://rust-lang.org".into(),
snippet: Some("A systems programming language".into()),
date: None,
}],
answer: Some("Rust is a systems programming language.".into()),
provider: SearchProvider::Tavily,
};
let output = format_search_response(&response, "what is rust");
assert!(output.contains("## Summary"));
assert!(output.contains("Rust is a systems programming language"));
assert!(output.contains("### Rust Lang"));
assert!(output.contains("(tavily)"));
}
#[test]
fn format_search_no_results() {
let response = types::SearchResponse {
results: vec![],
answer: None,
provider: SearchProvider::Exa,
};
let output = format_search_response(&response, "obscure query");
assert!(output.contains("No results found"));
assert!(output.contains("(exa)"));
}
#[test]
fn truncate_output_respects_limits() {
let long_text = (0..5000)
.map(|i| format!("Line {i}"))
.collect::<Vec<_>>()
.join("\n");
let result = truncate_output(long_text);
assert!(result.len() <= MAX_OUTPUT_BYTES + 500); assert!(result.contains("[Output truncated"));
}
}