stakpak_shared/models/integrations/
search_service.rs

1use crate::container::{self, ContainerConfig};
2use crate::local_store::LocalStore;
3use crate::models::error::{AgentError, BadRequestErrorMessage};
4use reqwest_middleware::{ClientBuilder, ClientWithMiddleware};
5use reqwest_retry::{RetryTransientMiddleware, policies::ExponentialBackoff};
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::env;
9
10const DEFAULT_SCRAPE_LIMIT: u32 = 3;
11const DEFAULT_LANGUAGE: &str = "en";
12const MAX_RETRIES: u32 = 3;
13
14const MIN_LIMIT: u32 = 1;
15const MAX_LIMIT: u32 = 100;
16const CONFIG_FILE: &str = "search_config.json";
17
18const DEFAULT_API_IMAGE: &str = "ghcr.io/stakpak/local_search:0.3";
19
20const STATIC_WHITELIST_URLS: &[&str] = &[];
21
22#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
23pub struct ScrapedContent {
24    pub url: String,
25    pub title: Option<String>,
26    pub content: Option<String>,
27    pub metadata: Option<serde_json::Value>,
28    pub error: Option<String>,
29}
30
31#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
32pub struct SearchResult {
33    pub title: String,
34    pub url: String,
35    pub snippet: Option<String>,
36    pub engine: Option<String>,
37    pub score: Option<f32>,
38}
39
40#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
41pub struct SearchRequest {
42    pub query: String,
43    pub limit: u32,
44    pub lang: String,
45    pub engines: Option<Vec<String>>,
46}
47
48#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
49pub struct AnalysisResult {
50    pub required_documentation: Vec<String>,
51    pub reformulated_query: String,
52}
53
54#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
55pub struct ValidationResult {
56    pub is_satisfied: bool,
57    pub valid_docs: Vec<ScrapedContent>,
58    pub needed_urls: Vec<String>,
59    pub new_query: Option<String>,
60}
61
62impl SearchRequest {
63    pub fn validate(&self) -> Result<(), AgentError> {
64        if self.query.trim().is_empty() {
65            return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
66                "Search query cannot be empty".to_string(),
67            )));
68        }
69
70        if self.limit < MIN_LIMIT || self.limit > MAX_LIMIT {
71            return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
72                format!("Limit must be between {} and {}", MIN_LIMIT, MAX_LIMIT),
73            )));
74        }
75
76        Ok(())
77    }
78}
79
80#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
81pub struct ScrapeRequest {
82    pub urls: Vec<String>,
83}
84
85impl ScrapeRequest {
86    pub fn validate(&self) -> Result<(), AgentError> {
87        if self.urls.is_empty() {
88            return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
89                "URLs list cannot be empty".to_string(),
90            )));
91        }
92
93        for url in &self.urls {
94            if url.trim().is_empty() {
95                return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
96                    "URL cannot be empty".to_string(),
97                )));
98            }
99        }
100
101        Ok(())
102    }
103}
104
105#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
106pub struct SearchAndScrapeRequest {
107    #[serde(flatten)]
108    pub search: SearchRequest,
109    pub scrape_limit: u32,
110    pub whitelist: Option<Vec<String>>,
111}
112
113#[derive(Debug, Serialize, Deserialize, Clone)]
114pub struct SearchConfig {
115    pub api_port: u16,
116    pub api_container_id: String,
117}
118
119pub struct SearchClient {
120    client: ClientWithMiddleware,
121    api_url: String,
122}
123
124impl SearchClient {
125    pub fn new(api_url: String) -> Self {
126        let retry_policy = ExponentialBackoff::builder().build_with_max_retries(MAX_RETRIES);
127        let client = ClientBuilder::new(reqwest::Client::new())
128            .with(RetryTransientMiddleware::new_with_policy(retry_policy))
129            .build();
130
131        Self { client, api_url }
132    }
133
134    pub async fn search(&self, query: String) -> Result<Vec<SearchResult>, AgentError> {
135        let request = SearchRequest {
136            query,
137            limit: DEFAULT_SCRAPE_LIMIT,
138            lang: DEFAULT_LANGUAGE.to_string(),
139            engines: None,
140        };
141
142        request.validate()?;
143
144        self.execute_request("/search", &request).await
145    }
146
147    /// Searches the web and scrapes the top results
148    pub async fn search_and_scrape(
149        &self,
150        query: String,
151        whitelist: Option<Vec<String>>,
152    ) -> Result<Vec<ScrapedContent>, AgentError> {
153        let whitelist = match whitelist {
154            Some(w) if !w.is_empty() => Some(w),
155            _ => Some(
156                STATIC_WHITELIST_URLS
157                    .iter()
158                    .map(|s| s.to_string())
159                    .collect(),
160            ),
161        };
162
163        let request = SearchAndScrapeRequest {
164            search: SearchRequest {
165                query,
166                limit: DEFAULT_SCRAPE_LIMIT,
167                lang: DEFAULT_LANGUAGE.to_string(),
168                engines: None,
169            },
170            scrape_limit: DEFAULT_SCRAPE_LIMIT,
171            whitelist,
172        };
173
174        request.search.validate()?;
175
176        self.execute_request("/search-and-scrape", &request).await
177    }
178
179    /// Scrapes content from the provided URLs
180    pub async fn scrape(&self, urls: Vec<String>) -> Result<Vec<ScrapedContent>, AgentError> {
181        let request = ScrapeRequest { urls };
182        request.validate()?;
183
184        self.execute_request("/scrape", &request).await
185    }
186
187    /// Generic method to execute API requests with proper error handling
188    async fn execute_request<T, R>(&self, endpoint: &str, request: &T) -> Result<R, AgentError>
189    where
190        T: Serialize,
191        R: for<'de> Deserialize<'de>,
192    {
193        let response = self
194            .client
195            .post(format!("{}{}", self.api_url, endpoint))
196            .header("Content-Type", "application/json")
197            .header("Accept", "application/json")
198            .json(request)
199            .send()
200            .await
201            .map_err(|e| {
202                AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
203                    "Failed to send request to {}: {}",
204                    endpoint, e
205                )))
206            })?;
207
208        let status = response.status();
209        if !status.is_success() {
210            let error_text = response
211                .text()
212                .await
213                .unwrap_or_else(|_| "Unknown error".to_string());
214            return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
215                format!(
216                    "Request to {} failed with status {}: {}",
217                    endpoint, status, error_text
218                ),
219            )));
220        }
221
222        response.json::<R>().await.map_err(|e| {
223            AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
224                "Failed to parse response from {}: {}",
225                endpoint, e
226            )))
227        })
228    }
229}
230
231#[derive(Debug)]
232pub struct SearchServicesOrchestrator;
233
234impl SearchServicesOrchestrator {
235    pub async fn start() -> Result<SearchConfig, AgentError> {
236        if let Some(config) = Self::load_config() {
237            let api_url = format!("http://localhost:{}", config.api_port);
238
239            if Self::health_check_api(&api_url).await.is_ok() {
240                return Ok(config);
241            }
242
243            let _ = crate::container::remove_container(&config.api_container_id, true, true);
244        }
245
246        if !crate::container::is_docker_available() {
247            return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
248                "Docker is not installed or not accessible. Please install Docker to use web search functionality.".to_string(),
249            )));
250        }
251
252        let api_image = env::var("API_IMAGE").unwrap_or_else(|_| DEFAULT_API_IMAGE.to_string());
253
254        Self::ensure_image_exists(&api_image)?;
255
256        let searxng_docker_port = 8080;
257        let api_docker_port = 8000;
258
259        let env = HashMap::from([
260            ("INSTANCE_NAME".to_string(), "SearchPak".to_string()),
261            (
262                "SEARXNG_SECRET".to_string(),
263                //SECURITY TODO: auto generate secret key
264                "stakpak-secret-key".to_string(),
265            ),
266            ("SEARXNG_PORT".to_string(), searxng_docker_port.to_string()),
267            ("SEARXNG_BIND_ADDRESS".to_string(), "0.0.0.0".to_string()),
268            (
269                "SEARXNG_BASE_URL".to_string(),
270                format!("http://localhost:{}", searxng_docker_port),
271            ),
272            ("PORT".to_string(), api_docker_port.to_string()),
273        ]);
274
275        let api_config = ContainerConfig {
276            image: api_image,
277            env_vars: env,
278            ports: vec![format!("{}:{}", 0, api_docker_port)],
279            extra_hosts: vec!["host.docker.internal:host-gateway".to_string()],
280            volumes: vec![],
281        };
282
283        let api_container_id =
284            crate::container::run_container_detached(api_config).map_err(|e| {
285                AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
286                    "Failed to start API container: {}",
287                    e
288                )))
289            })?;
290
291        let api_port = container::get_container_host_port(&api_container_id, api_docker_port)
292            .map_err(|e| {
293                AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
294                    "Failed to get API port: {}",
295                    e
296                )))
297            })?;
298
299        let config = SearchConfig {
300            api_port,
301            api_container_id,
302        };
303
304        Self::save_config(&config)?;
305
306        Ok(config)
307    }
308
309    pub async fn stop() -> Result<(), AgentError> {
310        if let Some(config) = Self::load_config() {
311            crate::container::remove_container(&config.api_container_id, true, true).map_err(
312                |e| {
313                    AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
314                        "Failed to stop API container: {}",
315                        e
316                    )))
317                },
318            )?;
319
320            Ok(())
321        } else {
322            Ok(())
323        }
324    }
325
326    pub fn stop_sync() -> Result<(), AgentError> {
327        if let Some(config) = Self::load_config() {
328            crate::container::remove_container(&config.api_container_id, true, true).map_err(
329                |e| {
330                    AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
331                        "Failed to stop API container: {}",
332                        e
333                    )))
334                },
335            )?;
336
337            Ok(())
338        } else {
339            Ok(())
340        }
341    }
342
343    pub async fn check() -> Result<bool, AgentError> {
344        if let Some(config) = Self::load_config() {
345            let api_url = format!("http://localhost:{}", config.api_port);
346
347            if Self::health_check_api(&api_url).await.is_ok() {
348                return Ok(true);
349            }
350        }
351
352        Ok(false)
353    }
354
355    fn load_config() -> Option<SearchConfig> {
356        match LocalStore::read_session_data(CONFIG_FILE) {
357            Ok(data) => serde_json::from_str(&data).ok(),
358            Err(_) => None,
359        }
360    }
361
362    fn save_config(config: &SearchConfig) -> Result<(), AgentError> {
363        let data = serde_json::to_string_pretty(config).map_err(|e| {
364            AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
365                "Failed to serialize config: {}",
366                e
367            )))
368        })?;
369
370        LocalStore::write_session_data(CONFIG_FILE, &data)
371            .map(|_| ())
372            .map_err(|e| {
373                AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
374                    "Failed to save config: {}",
375                    e
376                )))
377            })
378    }
379
380    async fn health_check_api(api_url: &str) -> Result<(), AgentError> {
381        let response = reqwest::get(format!("{}/health", api_url))
382            .await
383            .map_err(|e| {
384                AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
385                    "API health check failed: {}",
386                    e
387                )))
388            })?;
389
390        if !response.status().is_success() {
391            return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
392                format!("API health check returned status: {}", response.status()),
393            )));
394        }
395
396        Ok(())
397    }
398
399    fn ensure_image_exists(image: &str) -> Result<(), AgentError> {
400        if !crate::container::image_exists_locally(image).map_err(|e| {
401            AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
402                "Failed to check image: {}",
403                e
404            )))
405        })? {
406            return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
407                format!(
408                    "IMMEDIATE ACTION REQUIRED: Image '{}' is missing. You must now: 1) Execute 'docker pull {}' 2) Immediately retry the exact same operation that just failed. DO NOT summarize, DO NOT ask for confirmation, DO NOT wait for user input. Execute these steps now.",
409                    image, image
410                ),
411            )));
412        }
413        Ok(())
414    }
415}
416
417impl Drop for SearchServicesOrchestrator {
418    fn drop(&mut self) {
419        let _ = Self::stop_sync();
420    }
421}