hematite/tools/
research.rs1use lazy_static::lazy_static;
2use reqwest::header::USER_AGENT;
3use serde_json::Value;
4use std::sync::Mutex;
5use std::time::Duration;
6use std::time::Instant;
7
8lazy_static! {
9 static ref LAST_SEARCH_CALL: Mutex<Option<Instant>> = Mutex::new(None);
11}
12
13pub async fn execute_search(args: &Value, searx_url: Option<String>) -> Result<String, String> {
18 let query = args
19 .get("query")
20 .and_then(|v| v.as_str())
21 .ok_or_else(|| "Missing required argument: 'query'".to_string())?;
22
23 let results = perform_search(query, searx_url.as_deref()).await?;
25 if !results.is_empty() && !results.contains("No search results found") {
26 return Ok(results);
27 }
28
29 let tier2 = query
31 .replace("2024", "")
32 .replace("2025", "")
33 .replace("2026", "")
34 .replace("crate", "")
35 .split_whitespace()
36 .collect::<Vec<_>>()
37 .join(" ");
38
39 if tier2 != query {
40 let second_results = perform_search(&tier2, searx_url.as_deref()).await?;
41 if !second_results.is_empty() && !second_results.contains("No search results found") {
42 return Ok(second_results);
43 }
44 }
45
46 Ok(
47 "No search results found. All web content was safely sanitized. Try a broader search term."
48 .to_string(),
49 )
50}
51
52fn sanitize_web_content(text: &str) -> String {
55 text.replace("{", " (")
56 .replace("}", ") ")
57 .replace("[", " (")
58 .replace("]", ") ")
59 .replace("\"", "'")
60 .replace("<script", "[BLOCKED SCRIPT]")
61}
62
63async fn perform_search(query: &str, searx_url: Option<&str>) -> Result<String, String> {
64 let effective_url = searx_url.unwrap_or("http://localhost:8080");
66
67 match perform_searx_search(query, effective_url).await {
68 Ok(results) if !results.is_empty() => return Ok(results),
69 _ => {
70 }
73 }
74
75 let sleep_duration = {
77 if let Ok(last_call) = LAST_SEARCH_CALL.lock() {
78 last_call.and_then(|instant| {
79 let elapsed = instant.elapsed();
80 if elapsed < Duration::from_secs(3) {
81 Some(Duration::from_secs(3) - elapsed)
82 } else {
83 None
84 }
85 })
86 } else {
87 None
88 }
89 };
90 if let Some(duration) = sleep_duration {
91 tokio::time::sleep(duration).await;
92 }
93 if let Ok(mut last_call) = LAST_SEARCH_CALL.lock() {
94 *last_call = Some(Instant::now());
95 }
96
97 let encoded = percent_encoding::utf8_percent_encode(query, percent_encoding::NON_ALPHANUMERIC);
100 let search_url = format!("https://s.jina.ai/{}", encoded);
101
102 let client = reqwest::Client::builder()
103 .timeout(Duration::from_secs(20))
104 .build()
105 .map_err(|e| format!("Failed to build client: {e}"))?;
106
107 let mut request = client.get(&search_url)
108 .header(USER_AGENT, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
109
110 if let Ok(key) = std::env::var("JINA_API_KEY") {
112 request = request.header("Authorization", format!("Bearer {}", key));
113 }
114
115 let response = request
116 .send()
117 .await
118 .map_err(|e| format!("Failed to connect to search proxy: {e}"))?;
119
120 let markdown = response
121 .text()
122 .await
123 .map_err(|e| format!("Failed to read search response: {e}"))?;
124
125 if markdown.trim().starts_with("<!doctype html") || markdown.contains("<html") {
127 return Err("Search proxy returned raw HTML (possibly a rate limit or captcha). Falling back to internal reasoning.".into());
128 }
129
130 Ok(format!(
131 "[Source: Jina Search Proxy]\n\n{}",
132 sanitize_web_content(&markdown)
133 ))
134}
135
136async fn perform_searx_search(query: &str, base_url: &str) -> Result<String, String> {
137 let client = reqwest::Client::builder()
138 .timeout(Duration::from_secs(15))
139 .build()
140 .map_err(|e| format!("Failed to build SearXNG client: {e}"))?;
141
142 let base = base_url.trim_end_matches('/');
144 let search_url = format!(
145 "{}/search?q={}&format=json",
146 base,
147 urlencoding::encode(query)
148 );
149
150 let response = client
151 .get(&search_url)
152 .header(USER_AGENT, "Hematite-CLI/0.6.0")
153 .send()
154 .await
155 .map_err(|e| format!("SearXNG connection failed: {e}"))?;
156
157 if !response.status().is_success() {
158 return Err(format!("SearXNG returned error: {}", response.status()));
159 }
160
161 let json: Value = response
162 .json()
163 .await
164 .map_err(|e| format!("Failed to parse SearXNG JSON: {e}"))?;
165
166 let mut output = String::new();
167 output.push_str("[Source: SearXNG (Local/Auto-Detected)]\n\n");
168 output.push_str(&format!("# Search results for: {}\n\n", query));
169
170 if let Some(results) = json.get("results").and_then(|r| r.as_array()) {
171 for (i, res) in results.iter().take(10).enumerate() {
172 let title = res
173 .get("title")
174 .and_then(|v| v.as_str())
175 .unwrap_or("No Title");
176 let url = res.get("url").and_then(|v| v.as_str()).unwrap_or("#");
177 let content = res.get("content").and_then(|v| v.as_str()).unwrap_or("");
178
179 output.push_str(&format!(
180 "### {}. [{}]({})\n{}\n\n",
181 i + 1,
182 title,
183 url,
184 sanitize_web_content(content)
185 ));
186 }
187 }
188
189 if output.len() < 50 {
190 return Ok(String::new());
191 }
192
193 Ok(output)
194}
195
196pub async fn execute_fetch(args: &Value) -> Result<String, String> {
201 let url = args
202 .get("url")
203 .and_then(|v| v.as_str())
204 .ok_or_else(|| "Missing required argument: 'url'".to_string())?;
205
206 let proxy_url = format!("https://r.jina.ai/{}", url);
208
209 let client = reqwest::Client::builder()
210 .timeout(Duration::from_secs(25))
211 .build()
212 .map_err(|e| format!("Failed to build client: {e}"))?;
213
214 let mut request = client.get(&proxy_url)
215 .header(USER_AGENT, "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
216
217 if let Ok(key) = std::env::var("JINA_API_KEY") {
219 request = request.header("Authorization", format!("Bearer {}", key));
220 }
221
222 let response = request
223 .send()
224 .await
225 .map_err(|e| format!("Failed to connect to documentation proxy: {e}"))?;
226
227 let markdown = response
228 .text()
229 .await
230 .map_err(|e| format!("Failed to read documentation body: {e}"))?;
231
232 Ok(markdown)
233}