hematite/tools/
research.rs1use lazy_static::lazy_static;
2use reqwest::header::USER_AGENT;
3use serde_json::Value;
4use std::sync::Mutex;
5use std::time::Duration;
6use std::time::Instant;
7
8lazy_static! {
9 static ref LAST_SEARCH_CALL: Mutex<Option<Instant>> = Mutex::new(None);
11}
12
13pub async fn execute_search(args: &Value, searx_url: Option<String>) -> Result<String, String> {
18 let query = args
19 .get("query")
20 .and_then(|v| v.as_str())
21 .ok_or_else(|| "Missing required argument: 'query'".to_string())?;
22
23 let results = perform_search(query, searx_url.as_deref()).await?;
25 if !results.is_empty() && !results.contains("No search results found") {
26 return Ok(results);
27 }
28
29 let tier2 = query
31 .replace("2024", "")
32 .replace("2025", "")
33 .replace("2026", "")
34 .replace("crate", "")
35 .split_whitespace()
36 .collect::<Vec<_>>()
37 .join(" ");
38
39 if tier2 != query {
40 let second_results = perform_search(&tier2, searx_url.as_deref()).await?;
41 if !second_results.is_empty() && !second_results.contains("No search results found") {
42 return Ok(second_results);
43 }
44 }
45
46 Ok(
47 "No search results found. All web content was safely sanitized. Try a broader search term."
48 .to_string(),
49 )
50}
51
52fn sanitize_web_content(text: &str) -> String {
55 text.replace("{", " (")
57 .replace("}", ") ")
58 .replace("\"", "'")
59 .replace("<script", "[BLOCKED SCRIPT]")
60 .replace("<iframe", "[BLOCKED IFRAME]")
61 .replace("javascript:", "blocked-js:")
62}
63
64async fn perform_search(query: &str, searx_url: Option<&str>) -> Result<String, String> {
65 let effective_url = searx_url.unwrap_or("http://localhost:8080");
67
68 match perform_searx_search(query, effective_url).await {
69 Ok(results) if !results.is_empty() => return Ok(results),
70 _ => {
71 }
74 }
75
76 let sleep_duration = {
78 if let Ok(last_call) = LAST_SEARCH_CALL.lock() {
79 last_call.and_then(|instant| {
80 let elapsed = instant.elapsed();
81 if elapsed < Duration::from_secs(3) {
82 Some(Duration::from_secs(3) - elapsed)
83 } else {
84 None
85 }
86 })
87 } else {
88 None
89 }
90 };
91 if let Some(duration) = sleep_duration {
92 tokio::time::sleep(duration).await;
93 }
94 if let Ok(mut last_call) = LAST_SEARCH_CALL.lock() {
95 *last_call = Some(Instant::now());
96 }
97
98 let encoded = percent_encoding::utf8_percent_encode(query, percent_encoding::NON_ALPHANUMERIC);
101 let search_url = format!("https://s.jina.ai/{}", encoded);
102
103 let client = reqwest::Client::builder()
104 .timeout(Duration::from_secs(20))
105 .build()
106 .map_err(|e| format!("Failed to build client: {e}"))?;
107
108 let mut request = client.get(&search_url)
109 .header(USER_AGENT, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
110
111 if let Ok(key) = std::env::var("JINA_API_KEY") {
113 request = request.header("Authorization", format!("Bearer {}", key));
114 }
115
116 let response = request
117 .send()
118 .await
119 .map_err(|e| format!("Failed to connect to search proxy: {e}"))?;
120
121 let markdown = response
122 .text()
123 .await
124 .map_err(|e| format!("Failed to read search response: {e}"))?;
125
126 if markdown.trim().starts_with("<!doctype html") || markdown.contains("<html") {
128 return Err("Search proxy returned raw HTML (possibly a rate limit or captcha). Falling back to internal reasoning.".into());
129 }
130
131 Ok(format!(
132 "[Source: Jina Search Proxy]\n\n{}",
133 sanitize_web_content(&markdown)
134 ))
135}
136
137async fn perform_searx_search(query: &str, base_url: &str) -> Result<String, String> {
138 let client = reqwest::Client::builder()
139 .timeout(Duration::from_secs(5))
140 .build()
141 .map_err(|e| format!("Failed to build SearXNG client: {e}"))?;
142
143 let base = base_url.trim_end_matches('/');
145 let search_url = format!(
146 "{}/search?q={}&format=json",
147 base,
148 urlencoding::encode(query)
149 );
150
151 let response = client
152 .get(&search_url)
153 .header(USER_AGENT, "Hematite-CLI/0.6.0")
154 .send()
155 .await
156 .map_err(|e| format!("SearXNG connection failed: {e}"))?;
157
158 if !response.status().is_success() {
159 return Err(format!("SearXNG returned error: {}", response.status()));
160 }
161
162 let json: Value = response
163 .json()
164 .await
165 .map_err(|e| format!("Failed to parse SearXNG JSON: {e}"))?;
166
167 let mut output = String::new();
168 output.push_str("[Source: SearXNG (Local/Auto-Detected)]\n\n");
169 output.push_str(&format!("# Search results for: {}\n\n", query));
170
171 if let Some(results) = json.get("results").and_then(|r| r.as_array()) {
172 for (i, res) in results.iter().take(10).enumerate() {
173 let title = res
174 .get("title")
175 .and_then(|v| v.as_str())
176 .unwrap_or("No Title");
177 let url = res.get("url").and_then(|v| v.as_str()).unwrap_or("#");
178 let content = res.get("content").and_then(|v| v.as_str()).unwrap_or("");
179
180 output.push_str(&format!(
181 "### {}. [{}]({})\n{}\n\n",
182 i + 1,
183 title,
184 url,
185 sanitize_web_content(content)
186 ));
187 }
188 }
189
190 if output.len() < 50 {
191 return Ok(String::new());
192 }
193
194 Ok(output)
195}
196
197#[cfg(test)]
198mod tests {
199 use super::sanitize_web_content;
200
201 #[test]
202 fn sanitize_web_content_blocks_script_patterns_without_breaking_markdown_links() {
203 let input = r#"Use {"tool":"shell"} and [Rust](https://www.rust-lang.org) <iframe src="x"></iframe>"#;
204 let sanitized = sanitize_web_content(input);
205
206 assert!(sanitized.contains("('tool':'shell')"));
207 assert!(sanitized.contains("[Rust](https://www.rust-lang.org)"));
208 assert!(sanitized.contains("[BLOCKED IFRAME]"));
209 }
210}
211
212pub async fn execute_fetch(args: &Value) -> Result<String, String> {
217 let url = args
218 .get("url")
219 .and_then(|v| v.as_str())
220 .ok_or_else(|| "Missing required argument: 'url'".to_string())?;
221
222 let proxy_url = format!("https://r.jina.ai/{}", url);
224
225 let client = reqwest::Client::builder()
226 .timeout(Duration::from_secs(25))
227 .build()
228 .map_err(|e| format!("Failed to build client: {e}"))?;
229
230 let mut request = client.get(&proxy_url)
231 .header(USER_AGENT, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
232
233 if let Ok(key) = std::env::var("JINA_API_KEY") {
235 request = request.header("Authorization", format!("Bearer {}", key));
236 }
237
238 let response = request
239 .send()
240 .await
241 .map_err(|e| format!("Failed to connect to documentation proxy: {e}"))?;
242
243 let markdown = response
244 .text()
245 .await
246 .map_err(|e| format!("Failed to read documentation body: {e}"))?;
247
248 Ok(markdown)
249}