Skip to main content

stakpak_shared/models/integrations/
search_service.rs

1use crate::container::{self, ContainerConfig};
2use crate::local_store::LocalStore;
3use crate::models::error::{AgentError, BadRequestErrorMessage};
4use reqwest_middleware::{ClientBuilder, ClientWithMiddleware};
5use reqwest_retry::{RetryTransientMiddleware, policies::ExponentialBackoff};
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::env;
9
10const DEFAULT_SCRAPE_LIMIT: u32 = 3;
11const DEFAULT_LANGUAGE: &str = "en";
12const MAX_RETRIES: u32 = 3;
13
14const MIN_LIMIT: u32 = 1;
15const MAX_LIMIT: u32 = 100;
16const CONFIG_FILE: &str = "search_config.json";
17
18const DEFAULT_API_IMAGE: &str = "ghcr.io/stakpak/local_search:0.3";
19
20const STATIC_WHITELIST_URLS: &[&str] = &[];
21
22#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
23pub struct ScrapedContent {
24    pub url: String,
25    pub title: Option<String>,
26    pub content: Option<String>,
27    pub metadata: Option<serde_json::Value>,
28    pub error: Option<String>,
29}
30
31#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
32pub struct SearchResult {
33    pub title: String,
34    pub url: String,
35    pub snippet: Option<String>,
36    pub engine: Option<String>,
37    pub score: Option<f32>,
38}
39
40#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
41pub struct SearchRequest {
42    pub query: String,
43    pub limit: u32,
44    pub lang: String,
45    pub engines: Option<Vec<String>>,
46}
47
48#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
49pub struct AnalysisResult {
50    pub required_documentation: Vec<String>,
51    pub reformulated_query: String,
52}
53
54#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
55pub struct ValidationResult {
56    pub is_satisfied: bool,
57    pub valid_docs: Vec<ScrapedContent>,
58    pub needed_urls: Vec<String>,
59    pub new_query: Option<String>,
60}
61
62impl SearchRequest {
63    pub fn validate(&self) -> Result<(), AgentError> {
64        if self.query.trim().is_empty() {
65            return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
66                "Search query cannot be empty".to_string(),
67            )));
68        }
69
70        if self.limit < MIN_LIMIT || self.limit > MAX_LIMIT {
71            return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
72                format!("Limit must be between {} and {}", MIN_LIMIT, MAX_LIMIT),
73            )));
74        }
75
76        Ok(())
77    }
78}
79
80#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
81pub struct ScrapeRequest {
82    pub urls: Vec<String>,
83}
84
85impl ScrapeRequest {
86    pub fn validate(&self) -> Result<(), AgentError> {
87        if self.urls.is_empty() {
88            return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
89                "URLs list cannot be empty".to_string(),
90            )));
91        }
92
93        for url in &self.urls {
94            if url.trim().is_empty() {
95                return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
96                    "URL cannot be empty".to_string(),
97                )));
98            }
99        }
100
101        Ok(())
102    }
103}
104
105#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
106pub struct SearchAndScrapeRequest {
107    #[serde(flatten)]
108    pub search: SearchRequest,
109    pub scrape_limit: u32,
110    pub whitelist: Option<Vec<String>>,
111}
112
113#[derive(Debug, Serialize, Deserialize, Clone)]
114pub struct SearchConfig {
115    pub api_port: u16,
116    pub api_container_id: String,
117}
118
119pub struct SearchClient {
120    client: ClientWithMiddleware,
121    api_url: String,
122}
123
124impl SearchClient {
125    pub fn new(api_url: String) -> Self {
126        let retry_policy = ExponentialBackoff::builder().build_with_max_retries(MAX_RETRIES);
127        let base_client =
128            crate::tls_client::create_tls_client(crate::tls_client::TlsClientConfig::default())
129                .unwrap_or_else(|_| reqwest::Client::new());
130        let client = ClientBuilder::new(base_client)
131            .with(RetryTransientMiddleware::new_with_policy(retry_policy))
132            .build();
133
134        Self { client, api_url }
135    }
136
137    pub async fn search(&self, query: String) -> Result<Vec<SearchResult>, AgentError> {
138        let request = SearchRequest {
139            query,
140            limit: DEFAULT_SCRAPE_LIMIT,
141            lang: DEFAULT_LANGUAGE.to_string(),
142            engines: None,
143        };
144
145        request.validate()?;
146
147        self.execute_request("/search", &request).await
148    }
149
150    /// Searches the web and scrapes the top results
151    pub async fn search_and_scrape(
152        &self,
153        query: String,
154        whitelist: Option<Vec<String>>,
155    ) -> Result<Vec<ScrapedContent>, AgentError> {
156        let whitelist = match whitelist {
157            Some(w) if !w.is_empty() => Some(w),
158            _ => Some(
159                STATIC_WHITELIST_URLS
160                    .iter()
161                    .map(|s| s.to_string())
162                    .collect(),
163            ),
164        };
165
166        let request = SearchAndScrapeRequest {
167            search: SearchRequest {
168                query,
169                limit: DEFAULT_SCRAPE_LIMIT,
170                lang: DEFAULT_LANGUAGE.to_string(),
171                engines: None,
172            },
173            scrape_limit: DEFAULT_SCRAPE_LIMIT,
174            whitelist,
175        };
176
177        request.search.validate()?;
178
179        self.execute_request("/search-and-scrape", &request).await
180    }
181
182    /// Scrapes content from the provided URLs
183    pub async fn scrape(&self, urls: Vec<String>) -> Result<Vec<ScrapedContent>, AgentError> {
184        let request = ScrapeRequest { urls };
185        request.validate()?;
186
187        self.execute_request("/scrape", &request).await
188    }
189
190    /// Generic method to execute API requests with proper error handling
191    async fn execute_request<T, R>(&self, endpoint: &str, request: &T) -> Result<R, AgentError>
192    where
193        T: Serialize,
194        R: for<'de> Deserialize<'de>,
195    {
196        let response = self
197            .client
198            .post(format!("{}{}", self.api_url, endpoint))
199            .header("Content-Type", "application/json")
200            .header("Accept", "application/json")
201            .json(request)
202            .send()
203            .await
204            .map_err(|e| {
205                AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
206                    "Failed to send request to {}: {}",
207                    endpoint, e
208                )))
209            })?;
210
211        let status = response.status();
212        if !status.is_success() {
213            let error_text = response
214                .text()
215                .await
216                .unwrap_or_else(|_| "Unknown error".to_string());
217            return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
218                format!(
219                    "Request to {} failed with status {}: {}",
220                    endpoint, status, error_text
221                ),
222            )));
223        }
224
225        response.json::<R>().await.map_err(|e| {
226            AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
227                "Failed to parse response from {}: {}",
228                endpoint, e
229            )))
230        })
231    }
232}
233
234#[derive(Debug)]
235pub struct SearchServicesOrchestrator;
236
237impl SearchServicesOrchestrator {
238    pub async fn start() -> Result<SearchConfig, AgentError> {
239        if let Some(config) = Self::load_config() {
240            let api_url = format!("http://localhost:{}", config.api_port);
241
242            if Self::health_check_api(&api_url).await.is_ok() {
243                return Ok(config);
244            }
245
246            let _ = crate::container::remove_container(&config.api_container_id, true, true);
247        }
248
249        if !crate::container::is_docker_available() {
250            return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
251                "Docker is not installed or not accessible. Please install Docker to use web search functionality.".to_string(),
252            )));
253        }
254
255        let api_image = env::var("API_IMAGE").unwrap_or_else(|_| DEFAULT_API_IMAGE.to_string());
256
257        Self::ensure_image_exists(&api_image)?;
258
259        let searxng_docker_port = 8080;
260        let api_docker_port = 8000;
261
262        let env = HashMap::from([
263            ("INSTANCE_NAME".to_string(), "SearchPak".to_string()),
264            (
265                "SEARXNG_SECRET".to_string(),
266                //SECURITY TODO: auto generate secret key
267                "stakpak-secret-key".to_string(),
268            ),
269            ("SEARXNG_PORT".to_string(), searxng_docker_port.to_string()),
270            ("SEARXNG_BIND_ADDRESS".to_string(), "0.0.0.0".to_string()),
271            (
272                "SEARXNG_BASE_URL".to_string(),
273                format!("http://localhost:{}", searxng_docker_port),
274            ),
275            ("PORT".to_string(), api_docker_port.to_string()),
276        ]);
277
278        let api_config = ContainerConfig {
279            image: api_image,
280            env_vars: env,
281            ports: vec![format!("{}:{}", 0, api_docker_port)],
282            extra_hosts: vec!["host.docker.internal:host-gateway".to_string()],
283            volumes: vec![],
284        };
285
286        let api_container_id =
287            crate::container::run_container_detached(api_config).map_err(|e| {
288                AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
289                    "Failed to start API container: {}",
290                    e
291                )))
292            })?;
293
294        let api_port = container::get_container_host_port(&api_container_id, api_docker_port)
295            .map_err(|e| {
296                AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
297                    "Failed to get API port: {}",
298                    e
299                )))
300            })?;
301
302        let config = SearchConfig {
303            api_port,
304            api_container_id,
305        };
306
307        Self::save_config(&config)?;
308
309        Ok(config)
310    }
311
312    pub async fn stop() -> Result<(), AgentError> {
313        if let Some(config) = Self::load_config() {
314            crate::container::remove_container(&config.api_container_id, true, true).map_err(
315                |e| {
316                    AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
317                        "Failed to stop API container: {}",
318                        e
319                    )))
320                },
321            )?;
322
323            Ok(())
324        } else {
325            Ok(())
326        }
327    }
328
329    pub fn stop_sync() -> Result<(), AgentError> {
330        if let Some(config) = Self::load_config() {
331            crate::container::remove_container(&config.api_container_id, true, true).map_err(
332                |e| {
333                    AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
334                        "Failed to stop API container: {}",
335                        e
336                    )))
337                },
338            )?;
339
340            Ok(())
341        } else {
342            Ok(())
343        }
344    }
345
346    pub async fn check() -> Result<bool, AgentError> {
347        if let Some(config) = Self::load_config() {
348            let api_url = format!("http://localhost:{}", config.api_port);
349
350            if Self::health_check_api(&api_url).await.is_ok() {
351                return Ok(true);
352            }
353        }
354
355        Ok(false)
356    }
357
358    fn load_config() -> Option<SearchConfig> {
359        match LocalStore::read_session_data(CONFIG_FILE) {
360            Ok(data) => serde_json::from_str(&data).ok(),
361            Err(_) => None,
362        }
363    }
364
365    fn save_config(config: &SearchConfig) -> Result<(), AgentError> {
366        let data = serde_json::to_string_pretty(config).map_err(|e| {
367            AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
368                "Failed to serialize config: {}",
369                e
370            )))
371        })?;
372
373        LocalStore::write_session_data(CONFIG_FILE, &data)
374            .map(|_| ())
375            .map_err(|e| {
376                AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
377                    "Failed to save config: {}",
378                    e
379                )))
380            })
381    }
382
383    async fn health_check_api(api_url: &str) -> Result<(), AgentError> {
384        let response = reqwest::get(format!("{}/health", api_url))
385            .await
386            .map_err(|e| {
387                AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
388                    "API health check failed: {}",
389                    e
390                )))
391            })?;
392
393        if !response.status().is_success() {
394            return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
395                format!("API health check returned status: {}", response.status()),
396            )));
397        }
398
399        Ok(())
400    }
401
402    fn ensure_image_exists(image: &str) -> Result<(), AgentError> {
403        if !crate::container::image_exists_locally(image).map_err(|e| {
404            AgentError::BadRequest(BadRequestErrorMessage::ApiError(format!(
405                "Failed to check image: {}",
406                e
407            )))
408        })? {
409            return Err(AgentError::BadRequest(BadRequestErrorMessage::ApiError(
410                format!(
411                    "IMMEDIATE ACTION REQUIRED: Image '{}' is missing. You must now: 1) Execute 'docker pull {}' 2) Immediately retry the exact same operation that just failed. DO NOT summarize, DO NOT ask for confirmation, DO NOT wait for user input. Execute these steps now.",
412                    image, image
413                ),
414            )));
415        }
416        Ok(())
417    }
418}
419
420impl Drop for SearchServicesOrchestrator {
421    fn drop(&mut self) {
422        let _ = Self::stop_sync();
423    }
424}