1use crate::container::{self, ContainerConfig};
2use crate::local_store::LocalStore;
3use crate::models::error::{AgentError, BadRequestErrorMessage};
4use reqwest_middleware::{ClientBuilder, ClientWithMiddleware};
5use reqwest_retry::{RetryTransientMiddleware, policies::ExponentialBackoff};
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::env;
9
10const DEFAULT_SCRAPE_LIMIT: u32 = 3;
11const DEFAULT_LANGUAGE: &str = "en";
12const MAX_RETRIES: u32 = 3;
13
14const MIN_LIMIT: u32 = 1;
15const MAX_LIMIT: u32 = 100;
16const CONFIG_FILE: &str = "search_config.json";
17
18const DEFAULT_API_IMAGE: &str = "ghcr.io/stakpak/local_search:0.3";
19
20const STATIC_WHITELIST_URLS: &[&str] = &[];
21
22#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
23pub struct ScrapedContent {
24 pub url: String,
25 pub title: Option<String>,
26 pub content: Option<String>,
27 pub metadata: Option<serde_json::Value>,
28 pub error: Option<String>,
29}
30
31#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
32pub struct SearchResult {
33 pub title: String,
34 pub url: String,
35 pub snippet: Option<String>,
36 pub engine: Option<String>,
37 pub score: Option<f32>,
38}
39
40#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
41pub struct SearchRequest {
42 pub query: String,
43 pub limit: u32,
44 pub lang: String,
45 pub engines: Option<Vec<String>>,
46}
47
48#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
49pub struct AnalysisResult {
50 pub required_documentation: Vec<String>,
51 pub reformulated_query: String,
52}
53
54#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
55pub struct ValidationResult {
56 pub is_satisfied: bool,
57 pub valid_docs: Vec<ScrapedContent>,
58 pub needed_urls: Vec<String>,
59 pub new_query: Option<String>,
60}
61
62impl SearchRequest {
63 pub fn validate(&self) -> Result<(), AgentError> {
64 if self.query.trim().is_empty() {
65 return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
66 "Search query cannot be empty".to_string(),
67 )));
68 }
69
70 if self.limit < MIN_LIMIT || self.limit > MAX_LIMIT {
71 return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
72 format!("Limit must be between {} and {}", MIN_LIMIT, MAX_LIMIT),
73 )));
74 }
75
76 Ok(())
77 }
78}
79
80#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
81pub struct ScrapeRequest {
82 pub urls: Vec<String>,
83}
84
85impl ScrapeRequest {
86 pub fn validate(&self) -> Result<(), AgentError> {
87 if self.urls.is_empty() {
88 return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
89 "URLs list cannot be empty".to_string(),
90 )));
91 }
92
93 for url in &self.urls {
94 if url.trim().is_empty() {
95 return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
96 "URL cannot be empty".to_string(),
97 )));
98 }
99 }
100
101 Ok(())
102 }
103}
104
105#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
106pub struct SearchAndScrapeRequest {
107 #[serde(flatten)]
108 pub search: SearchRequest,
109 pub scrape_limit: u32,
110 pub whitelist: Option<Vec<String>>,
111}
112
113#[derive(Debug, Serialize, Deserialize, Clone)]
114pub struct SearchConfig {
115 pub api_port: u16,
116 pub api_container_id: String,
117}
118
119pub struct SearchClient {
120 client: ClientWithMiddleware,
121 api_url: String,
122}
123
124impl SearchClient {
125 pub fn new(api_url: String) -> Self {
126 let retry_policy = ExponentialBackoff::builder().build_with_max_retries(MAX_RETRIES);
127 let client = ClientBuilder::new(reqwest::Client::new())
128 .with(RetryTransientMiddleware::new_with_policy(retry_policy))
129 .build();
130
131 Self { client, api_url }
132 }
133
134 pub async fn search(&self, query: String) -> Result<Vec<SearchResult>, AgentError> {
135 let request = SearchRequest {
136 query,
137 limit: DEFAULT_SCRAPE_LIMIT,
138 lang: DEFAULT_LANGUAGE.to_string(),
139 engines: None,
140 };
141
142 request.validate()?;
143
144 self.execute_request("/search", &request).await
145 }
146
147 pub async fn search_and_scrape(
149 &self,
150 query: String,
151 whitelist: Option<Vec<String>>,
152 ) -> Result<Vec<ScrapedContent>, AgentError> {
153 let whitelist = match whitelist {
154 Some(w) if !w.is_empty() => Some(w),
155 _ => Some(
156 STATIC_WHITELIST_URLS
157 .iter()
158 .map(|s| s.to_string())
159 .collect(),
160 ),
161 };
162
163 let request = SearchAndScrapeRequest {
164 search: SearchRequest {
165 query,
166 limit: DEFAULT_SCRAPE_LIMIT,
167 lang: DEFAULT_LANGUAGE.to_string(),
168 engines: None,
169 },
170 scrape_limit: DEFAULT_SCRAPE_LIMIT,
171 whitelist,
172 };
173
174 request.search.validate()?;
175
176 self.execute_request("/search-and-scrape", &request).await
177 }
178
179 pub async fn scrape(&self, urls: Vec<String>) -> Result<Vec<ScrapedContent>, AgentError> {
181 let request = ScrapeRequest { urls };
182 request.validate()?;
183
184 self.execute_request("/scrape", &request).await
185 }
186
187 async fn execute_request<T, R>(&self, endpoint: &str, request: &T) -> Result<R, AgentError>
189 where
190 T: Serialize,
191 R: for<'de> Deserialize<'de>,
192 {
193 let response = self
194 .client
195 .post(format!("{}{}", self.api_url, endpoint))
196 .header("Content-Type", "application/json")
197 .header("Accept", "application/json")
198 .json(request)
199 .send()
200 .await
201 .map_err(|e| {
202 AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
203 "Failed to send request to {}: {}",
204 endpoint, e
205 )))
206 })?;
207
208 let status = response.status();
209 if !status.is_success() {
210 let error_text = response
211 .text()
212 .await
213 .unwrap_or_else(|_| "Unknown error".to_string());
214 return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
215 format!(
216 "Request to {} failed with status {}: {}",
217 endpoint, status, error_text
218 ),
219 )));
220 }
221
222 response.json::<R>().await.map_err(|e| {
223 AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
224 "Failed to parse response from {}: {}",
225 endpoint, e
226 )))
227 })
228 }
229}
230
231#[derive(Debug)]
232pub struct SearchServicesOrchestrator;
233
234impl SearchServicesOrchestrator {
235 pub async fn start() -> Result<SearchConfig, AgentError> {
236 if let Some(config) = Self::load_config() {
237 let api_url = format!("http://localhost:{}", config.api_port);
238
239 if Self::health_check_api(&api_url).await.is_ok() {
240 return Ok(config);
241 }
242
243 let _ = crate::container::remove_container(&config.api_container_id, true, true);
244 }
245
246 if !crate::container::is_docker_available() {
247 return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
248 "Docker is not installed or not accessible. Please install Docker to use web search functionality.".to_string(),
249 )));
250 }
251
252 let api_image = env::var("API_IMAGE").unwrap_or_else(|_| DEFAULT_API_IMAGE.to_string());
253
254 Self::ensure_image_exists(&api_image)?;
255
256 let searxng_docker_port = 8080;
257 let api_docker_port = 8000;
258
259 let env = HashMap::from([
260 ("INSTANCE_NAME".to_string(), "SearchPak".to_string()),
261 (
262 "SEARXNG_SECRET".to_string(),
263 "stakpak-secret-key".to_string(),
265 ),
266 ("SEARXNG_PORT".to_string(), searxng_docker_port.to_string()),
267 ("SEARXNG_BIND_ADDRESS".to_string(), "0.0.0.0".to_string()),
268 (
269 "SEARXNG_BASE_URL".to_string(),
270 format!("http://localhost:{}", searxng_docker_port),
271 ),
272 ("PORT".to_string(), api_docker_port.to_string()),
273 ]);
274
275 let api_config = ContainerConfig {
276 image: api_image,
277 env_vars: env,
278 ports: vec![format!("{}:{}", 0, api_docker_port)],
279 extra_hosts: vec!["host.docker.internal:host-gateway".to_string()],
280 volumes: vec![],
281 };
282
283 let api_container_id =
284 crate::container::run_container_detached(api_config).map_err(|e| {
285 AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
286 "Failed to start API container: {}",
287 e
288 )))
289 })?;
290
291 let api_port = container::get_container_host_port(&api_container_id, api_docker_port)
292 .map_err(|e| {
293 AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
294 "Failed to get API port: {}",
295 e
296 )))
297 })?;
298
299 let config = SearchConfig {
300 api_port,
301 api_container_id,
302 };
303
304 Self::save_config(&config)?;
305
306 Ok(config)
307 }
308
309 pub async fn stop() -> Result<(), AgentError> {
310 if let Some(config) = Self::load_config() {
311 crate::container::remove_container(&config.api_container_id, true, true).map_err(
312 |e| {
313 AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
314 "Failed to stop API container: {}",
315 e
316 )))
317 },
318 )?;
319
320 Ok(())
321 } else {
322 Ok(())
323 }
324 }
325
326 pub fn stop_sync() -> Result<(), AgentError> {
327 if let Some(config) = Self::load_config() {
328 crate::container::remove_container(&config.api_container_id, true, true).map_err(
329 |e| {
330 AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
331 "Failed to stop API container: {}",
332 e
333 )))
334 },
335 )?;
336
337 Ok(())
338 } else {
339 Ok(())
340 }
341 }
342
343 pub async fn check() -> Result<bool, AgentError> {
344 if let Some(config) = Self::load_config() {
345 let api_url = format!("http://localhost:{}", config.api_port);
346
347 if Self::health_check_api(&api_url).await.is_ok() {
348 return Ok(true);
349 }
350 }
351
352 Ok(false)
353 }
354
355 fn load_config() -> Option<SearchConfig> {
356 match LocalStore::read_session_data(CONFIG_FILE) {
357 Ok(data) => serde_json::from_str(&data).ok(),
358 Err(_) => None,
359 }
360 }
361
362 fn save_config(config: &SearchConfig) -> Result<(), AgentError> {
363 let data = serde_json::to_string_pretty(config).map_err(|e| {
364 AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
365 "Failed to serialize config: {}",
366 e
367 )))
368 })?;
369
370 LocalStore::write_session_data(CONFIG_FILE, &data)
371 .map(|_| ())
372 .map_err(|e| {
373 AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
374 "Failed to save config: {}",
375 e
376 )))
377 })
378 }
379
380 async fn health_check_api(api_url: &str) -> Result<(), AgentError> {
381 let response = reqwest::get(format!("{}/health", api_url))
382 .await
383 .map_err(|e| {
384 AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
385 "API health check failed: {}",
386 e
387 )))
388 })?;
389
390 if !response.status().is_success() {
391 return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
392 format!("API health check returned status: {}", response.status()),
393 )));
394 }
395
396 Ok(())
397 }
398
399 fn ensure_image_exists(image: &str) -> Result<(), AgentError> {
400 if !crate::container::image_exists_locally(image).map_err(|e| {
401 AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
402 "Failed to check image: {}",
403 e
404 )))
405 })? {
406 return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
407 format!(
408 "IMMEDIATE ACTION REQUIRED: Image '{}' is missing. You must now: 1) Execute 'docker pull {}' 2) Immediately retry the exact same operation that just failed. DO NOT summarize, DO NOT ask for confirmation, DO NOT wait for user input. Execute these steps now.",
409 image, image
410 ),
411 )));
412 }
413 Ok(())
414 }
415}
416
417impl Drop for SearchServicesOrchestrator {
418 fn drop(&mut self) {
419 let _ = Self::stop_sync();
420 }
421}