Skip to main content

hematite/tools/
research.rs

1use lazy_static::lazy_static;
2use reqwest::header::USER_AGENT;
3use serde_json::Value;
4use std::sync::Mutex;
5use std::time::Duration;
6use std::time::Instant;
7
8lazy_static! {
9    /// Rate limit: 2 seconds between search calls to prevent local IP blocking.
10    static ref LAST_SEARCH_CALL: Mutex<Option<Instant>> = Mutex::new(None);
11}
12
13/// tool: research_web
14///
15/// Perform a zero-cost technical search using DuckDuckGo Lite.
16/// Returns snippets and titles from technical search results.
17pub async fn execute_search(args: &Value) -> Result<String, String> {
18    let query = args
19        .get("query")
20        .and_then(|v| v.as_str())
21        .ok_or_else(|| "Missing required argument: 'query'".to_string())?;
22
23    // 1. First Attempt: Original Query via Jina Proxy
24    let results = perform_search(query).await?;
25    if !results.is_empty() {
26        return Ok(results);
27    }
28
29    // 2. Fallback: Simplified Query if needed
30    let tier2 = query
31        .replace("2024", "")
32        .replace("2025", "")
33        .replace("2026", "")
34        .replace("crate", "")
35        .split_whitespace()
36        .collect::<Vec<_>>()
37        .join(" ");
38
39    if tier2 != query {
40        let second_results = perform_search(&tier2).await?;
41        if !second_results.is_empty() {
42            return Ok(second_results);
43        }
44    }
45
46    Ok(
47        "No search results found. All web content was safely sanitized. Try a broader search term."
48            .to_string(),
49    )
50}
51
52/// Proactively strip JSON-like structures and tool-call patterns from web content.
53/// This prevents 'Prompt Injection' where a website tries to trick the agent into running commands.
54fn sanitize_web_content(text: &str) -> String {
55    text.replace("{", " (")
56        .replace("}", ") ")
57        .replace("[", " (")
58        .replace("]", ") ")
59        .replace("\"", "'")
60        .replace("<script", "[BLOCKED SCRIPT]")
61}
62
63async fn perform_search(query: &str) -> Result<String, String> {
64    // 1. Respect Rate Limits (even for proxy, to be a good citizen)
65    let sleep_duration = {
66        if let Ok(last_call) = LAST_SEARCH_CALL.lock() {
67            last_call.and_then(|instant| {
68                let elapsed = instant.elapsed();
69                if elapsed < Duration::from_secs(3) {
70                    Some(Duration::from_secs(3) - elapsed)
71                } else {
72                    None
73                }
74            })
75        } else {
76            None
77        }
78    };
79    if let Some(duration) = sleep_duration {
80        tokio::time::sleep(duration).await;
81    }
82    if let Ok(mut last_call) = LAST_SEARCH_CALL.lock() {
83        *last_call = Some(Instant::now());
84    }
85
86    // 2. Construct Jina Search Proxy URL
87    // s.jina.ai converts search results into clean markdown for agents.
88    let encoded = percent_encoding::utf8_percent_encode(query, percent_encoding::NON_ALPHANUMERIC);
89    let search_url = format!("https://s.jina.ai/{}", encoded);
90
91    let client = reqwest::Client::builder()
92        .timeout(Duration::from_secs(20))
93        .build()
94        .map_err(|e| format!("Failed to build client: {e}"))?;
95
96    let mut request = client.get(&search_url)
97        .header(USER_AGENT, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
98
99    // 2.5 Optional: Inject Jina API Key if available in environment
100    if let Ok(key) = std::env::var("JINA_API_KEY") {
101        request = request.header("Authorization", format!("Bearer {}", key));
102    }
103
104    let response = request
105        .send()
106        .await
107        .map_err(|e| format!("Failed to connect to search proxy: {e}"))?;
108
109    let markdown = response
110        .text()
111        .await
112        .map_err(|e| format!("Failed to read search response: {e}"))?;
113
114    // 3. Safety First: Sanitize all content before the agent reads it.
115    Ok(sanitize_web_content(&markdown))
116}
117
118/// tool: fetch_docs
119///
120/// Fetch any URL and convert it into clean, agent-ready Markdown using the Jina Reader proxy.
121/// This prevents local IP blocking and ensures structured context for documentation.
122pub async fn execute_fetch(args: &Value) -> Result<String, String> {
123    let url = args
124        .get("url")
125        .and_then(|v| v.as_str())
126        .ok_or_else(|| "Missing required argument: 'url'".to_string())?;
127
128    // Prefix with Jina Reader - it handles the rendering and markdown conversion for us.
129    let proxy_url = format!("https://r.jina.ai/{}", url);
130
131    let client = reqwest::Client::builder()
132        .timeout(Duration::from_secs(25))
133        .build()
134        .map_err(|e| format!("Failed to build client: {e}"))?;
135
136    let mut request = client.get(&proxy_url)
137        .header(USER_AGENT, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
138
139    // 2.5 Optional: Inject Jina API Key if available in environment
140    if let Ok(key) = std::env::var("JINA_API_KEY") {
141        request = request.header("Authorization", format!("Bearer {}", key));
142    }
143
144    let response = request
145        .send()
146        .await
147        .map_err(|e| format!("Failed to connect to documentation proxy: {e}"))?;
148
149    let markdown = response
150        .text()
151        .await
152        .map_err(|e| format!("Failed to read documentation body: {e}"))?;
153
154    Ok(markdown)
155}