hematite/tools/
research.rs1use lazy_static::lazy_static;
2use reqwest::header::USER_AGENT;
3use serde_json::Value;
4use std::fmt::Write as _;
5use std::sync::Mutex;
6use std::time::Duration;
7use std::time::Instant;
8
9lazy_static! {
10 static ref LAST_SEARCH_CALL: Mutex<Option<Instant>> = Mutex::new(None);
12}
13
14pub async fn execute_search(args: &Value, searx_url: Option<String>) -> Result<String, String> {
19 let query = args
20 .get("query")
21 .and_then(|v| v.as_str())
22 .ok_or_else(|| "Missing required argument: 'query'".to_string())?;
23
24 let results = perform_search(query, searx_url.as_deref()).await?;
26 if !results.is_empty() && !results.contains("No search results found") {
27 return Ok(results);
28 }
29
30 let tier2 = query
32 .replace("2024", "")
33 .replace("2025", "")
34 .replace("2026", "")
35 .replace("crate", "")
36 .split_whitespace()
37 .fold(String::new(), |mut s, w| {
38 if !s.is_empty() {
39 s.push(' ');
40 }
41 s.push_str(w);
42 s
43 });
44
45 if tier2 != query {
46 let second_results = perform_search(&tier2, searx_url.as_deref()).await?;
47 if !second_results.is_empty() && !second_results.contains("No search results found") {
48 return Ok(second_results);
49 }
50 }
51
52 Ok(
53 "No search results found. All web content was safely sanitized. Try a broader search term."
54 .to_string(),
55 )
56}
57
58fn sanitize_web_content(text: &str) -> String {
61 text.replace("{", " (")
63 .replace("}", ") ")
64 .replace("\"", "'")
65 .replace("<script", "[BLOCKED SCRIPT]")
66 .replace("<iframe", "[BLOCKED IFRAME]")
67 .replace("javascript:", "blocked-js:")
68}
69
70async fn perform_search(query: &str, searx_url: Option<&str>) -> Result<String, String> {
71 let effective_url = searx_url.unwrap_or("http://localhost:8080");
73
74 match perform_searx_search(query, effective_url).await {
75 Ok(results) if !results.is_empty() => return Ok(results),
76 _ => {
77 }
80 }
81
82 let sleep_duration = {
84 if let Ok(last_call) = LAST_SEARCH_CALL.lock() {
85 last_call.and_then(|instant| {
86 let elapsed = instant.elapsed();
87 if elapsed < Duration::from_secs(3) {
88 Some(Duration::from_secs(3) - elapsed)
89 } else {
90 None
91 }
92 })
93 } else {
94 None
95 }
96 };
97 if let Some(duration) = sleep_duration {
98 tokio::time::sleep(duration).await;
99 }
100 if let Ok(mut last_call) = LAST_SEARCH_CALL.lock() {
101 *last_call = Some(Instant::now());
102 }
103
104 let encoded = percent_encoding::utf8_percent_encode(query, percent_encoding::NON_ALPHANUMERIC);
107 let search_url = format!("https://s.jina.ai/{}", encoded);
108
109 let client = reqwest::Client::builder()
110 .timeout(Duration::from_secs(20))
111 .build()
112 .map_err(|e| format!("Failed to build client: {e}"))?;
113
114 let mut request = client.get(&search_url)
115 .header(USER_AGENT, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
116
117 if let Ok(key) = std::env::var("JINA_API_KEY") {
119 request = request.header("Authorization", format!("Bearer {}", key));
120 }
121
122 let response = request
123 .send()
124 .await
125 .map_err(|e| format!("Failed to connect to search proxy: {e}"))?;
126
127 let markdown = response
128 .text()
129 .await
130 .map_err(|e| format!("Failed to read search response: {e}"))?;
131
132 if markdown.trim().starts_with("<!doctype html") || markdown.contains("<html") {
134 return Err("Search proxy returned raw HTML (possibly a rate limit or captcha). Falling back to internal reasoning.".into());
135 }
136
137 Ok(format!(
138 "[Source: Jina Search Proxy]\n\n{}",
139 sanitize_web_content(&markdown)
140 ))
141}
142
143async fn perform_searx_search(query: &str, base_url: &str) -> Result<String, String> {
144 let client = reqwest::Client::builder()
145 .timeout(Duration::from_secs(5))
146 .build()
147 .map_err(|e| format!("Failed to build SearXNG client: {e}"))?;
148
149 let base = base_url.trim_end_matches('/');
151 let search_url = format!(
152 "{}/search?q={}&format=json",
153 base,
154 urlencoding::encode(query)
155 );
156
157 let response = client
158 .get(&search_url)
159 .header(USER_AGENT, "Hematite-CLI/0.6.0")
160 .send()
161 .await
162 .map_err(|e| format!("SearXNG connection failed: {e}"))?;
163
164 if !response.status().is_success() {
165 return Err(format!("SearXNG returned error: {}", response.status()));
166 }
167
168 let json: Value = response
169 .json()
170 .await
171 .map_err(|e| format!("Failed to parse SearXNG JSON: {e}"))?;
172
173 let mut output = String::with_capacity(query.len() + 4096);
174 output.push_str("[Source: SearXNG (Local/Auto-Detected)]\n\n");
175 let _ = write!(output, "# Search results for: {}\n\n", query);
176
177 if let Some(results) = json.get("results").and_then(|r| r.as_array()) {
178 for (i, res) in results.iter().take(10).enumerate() {
179 let title = res
180 .get("title")
181 .and_then(|v| v.as_str())
182 .unwrap_or("No Title");
183 let url = res.get("url").and_then(|v| v.as_str()).unwrap_or("#");
184 let content = res.get("content").and_then(|v| v.as_str()).unwrap_or("");
185
186 let _ = write!(
187 output,
188 "### {}. [{}]({})\n{}\n\n",
189 i + 1,
190 title,
191 url,
192 sanitize_web_content(content)
193 );
194 }
195 }
196
197 if output.len() < 50 {
198 return Ok(String::new());
199 }
200
201 Ok(output)
202}
203
204pub async fn execute_fetch(args: &Value) -> Result<String, String> {
209 let url = args
210 .get("url")
211 .and_then(|v| v.as_str())
212 .ok_or_else(|| "Missing required argument: 'url'".to_string())?;
213
214 let proxy_url = format!("https://r.jina.ai/{}", url);
216
217 let client = reqwest::Client::builder()
218 .timeout(Duration::from_secs(25))
219 .build()
220 .map_err(|e| format!("Failed to build client: {e}"))?;
221
222 let mut request = client.get(&proxy_url)
223 .header(USER_AGENT, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
224
225 if let Ok(key) = std::env::var("JINA_API_KEY") {
227 request = request.header("Authorization", format!("Bearer {}", key));
228 }
229
230 let response = request
231 .send()
232 .await
233 .map_err(|e| format!("Failed to connect to documentation proxy: {e}"))?;
234
235 let markdown = response
236 .text()
237 .await
238 .map_err(|e| format!("Failed to read documentation body: {e}"))?;
239
240 Ok(markdown)
241}
242
243#[cfg(test)]
244mod tests {
245 use super::sanitize_web_content;
246
247 #[test]
248 fn sanitize_web_content_blocks_script_patterns_without_breaking_markdown_links() {
249 let input = r#"Use {"tool":"shell"} and [Rust](https://www.rust-lang.org) <iframe src="x"></iframe>"#;
250 let sanitized = sanitize_web_content(input);
251
252 assert!(sanitized.contains("('tool':'shell')"));
253 assert!(sanitized.contains("[Rust](https://www.rust-lang.org)"));
254 assert!(sanitized.contains("[BLOCKED IFRAME]"));
255 }
256}