1use anyhow::Result;
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4
5use super::{SearchResult, SearchResults};
6
7#[derive(Debug, Serialize, Deserialize)]
8pub struct JinaSearchResult {
9 pub title: String,
10 pub url: String,
11 pub description: String,
12 #[serde(default)]
13 pub content: String,
14}
15
16#[derive(Debug, Serialize, Deserialize)]
17pub struct JinaDirectResponse {
18 pub data: Vec<JinaDirectResult>,
19}
20
21#[derive(Debug, Serialize, Deserialize)]
22pub struct JinaDirectResult {
23 pub title: String,
24 pub url: String,
25 #[serde(default)]
26 pub description: String,
27 #[serde(default)]
28 pub content: String,
29}
30
31pub struct JinaProvider {
32 pub url: String,
33 pub headers: HashMap<String, String>,
34}
35
36impl JinaProvider {
37 pub fn new(url: String, headers: HashMap<String, String>) -> Self {
38 Self { url, headers }
39 }
40
41 pub async fn search(&self, query: &str, count: Option<usize>) -> Result<SearchResults> {
42 let client = reqwest::Client::new();
43
44 let params = vec![("q", query.to_string())];
46
47 crate::debug_log!(
48 "Jina: Making GET request to {} with params: {:?}",
49 self.url,
50 params
51 );
52
53 let mut request = client.get(&self.url).query(¶ms);
54
55 let use_full_content = self.headers.contains_key("X-Engine")
57 && self
58 .headers
59 .get("X-Engine")
60 .map_or(false, |v| v == "direct");
61
62 for (name, value) in &self.headers {
64 if name == "Authorization" {
65 request = request.header(name, format!("Bearer {}", value));
67 crate::debug_log!("Jina: Added Authorization header with Bearer token");
68 } else {
69 request = request.header(name, value);
70 crate::debug_log!("Jina: Added header {}: {}", name, value);
71 }
72 }
73
74 let want_json = self.headers.contains_key("Accept")
76 && self
77 .headers
78 .get("Accept")
79 .map_or(false, |v| v.contains("application/json"));
80
81 if use_full_content {
82 crate::debug_log!("Jina: Using X-Engine: direct for full content reading");
83 }
84
85 if want_json {
86 crate::debug_log!("Jina: Requesting JSON format");
87 } else {
88 crate::debug_log!("Jina: Requesting default text format");
89 }
90
91 let response = request.send().await?;
92
93 let status = response.status();
94 crate::debug_log!("Jina: Received response with status: {}", status);
95
96 if !status.is_success() {
97 let error_text = response.text().await.unwrap_or_default();
98 crate::debug_log!("Jina: Error response: {}", error_text);
99 anyhow::bail!("Jina request failed with status {}: {}", status, error_text);
100 }
101
102 let response_text = response.text().await?;
103 crate::debug_log!("Jina: Response body length: {} bytes", response_text.len());
104
105 let mut results = Vec::new();
106 let max_results = count.unwrap_or(10);
107
108 if want_json {
109 crate::debug_log!("Jina: Parsing JSON response");
111
112 if use_full_content {
113 crate::debug_log!("Jina: Parsing direct engine JSON response");
115 let direct_response: JinaDirectResponse = serde_json::from_str(&response_text)
116 .map_err(|e| {
117 anyhow::anyhow!("Failed to parse Jina direct JSON response: {}", e)
118 })?;
119
120 for (index, result) in direct_response.data.iter().enumerate() {
121 if index >= max_results {
122 break;
123 }
124
125 let search_result = SearchResult {
126 title: result.title.clone(),
127 url: result.url.clone(),
128 snippet: if !result.content.is_empty() {
129 result.content.clone()
131 } else if !result.description.is_empty() {
132 result.description.clone()
133 } else {
134 "No content available".to_string()
135 },
136 published_date: None,
137 author: None,
138 score: None,
139 };
140
141 results.push(search_result);
142 }
143 } else {
144 let jina_results: Vec<JinaSearchResult> = serde_json::from_str(&response_text)
146 .map_err(|e| anyhow::anyhow!("Failed to parse Jina JSON response: {}", e))?;
147
148 for (index, result) in jina_results.iter().enumerate() {
149 if index >= max_results {
150 break;
151 }
152
153 let search_result = SearchResult {
154 title: result.title.clone(),
155 url: result.url.clone(),
156 snippet: if !result.description.is_empty() {
157 result.description.clone()
158 } else {
159 result.content.clone()
160 },
161 published_date: None,
162 author: None,
163 score: None,
164 };
165
166 results.push(search_result);
167 }
168 }
169 } else {
170 crate::debug_log!("Jina: Parsing text response");
172 let lines: Vec<&str> = response_text.lines().collect();
173 let mut current_result: Option<(String, String, String)> = None; for line in lines {
176 let line = line.trim();
177 if line.is_empty() {
178 continue;
179 }
180
181 if let Some(title_match) = line.strip_prefix("[") {
183 if let Some(end_bracket) = title_match.find("] Title: ") {
184 let title = title_match[end_bracket + 9..].to_string();
185 if let Some((prev_title, prev_url, prev_desc)) = current_result.take() {
186 if !prev_title.is_empty()
188 && !prev_url.is_empty()
189 && results.len() < max_results
190 {
191 results.push(SearchResult {
192 title: prev_title,
193 url: prev_url,
194 snippet: prev_desc,
195 published_date: None,
196 author: None,
197 score: None,
198 });
199 }
200 }
201 current_result = Some((title, String::new(), String::new()));
202 continue;
203 }
204 }
205
206 if let Some(url_match) = line.strip_prefix("[") {
208 if let Some(end_bracket) = url_match.find("] URL Source: ") {
209 let url = url_match[end_bracket + 13..].to_string();
210 if let Some((title, _, desc)) = current_result.take() {
211 current_result = Some((title, url, desc));
212 }
213 continue;
214 }
215 }
216
217 if let Some(desc_match) = line.strip_prefix("[") {
219 if let Some(end_bracket) = desc_match.find("] Description: ") {
220 let description = desc_match[end_bracket + 15..].to_string();
221 if let Some((title, url, _)) = current_result.take() {
222 current_result = Some((title, url, description));
223 }
224 continue;
225 }
226 }
227 }
228
229 if let Some((title, url, desc)) = current_result {
231 if !title.is_empty() && !url.is_empty() && results.len() < max_results {
232 results.push(SearchResult {
233 title,
234 url,
235 snippet: desc,
236 published_date: None,
237 author: None,
238 score: None,
239 });
240 }
241 }
242 }
243
244 crate::debug_log!("Jina: Successfully extracted {} results", results.len());
245
246 Ok(SearchResults {
247 query: query.to_string(),
248 provider: "Jina".to_string(),
249 results,
250 total_results: None, search_time_ms: None, })
253 }
254}
255
256pub async fn search(
258 provider_config: &super::SearchProviderConfig,
259 query: &str,
260 count: Option<usize>,
261) -> anyhow::Result<super::SearchResults> {
262 let provider = JinaProvider::new(provider_config.url.clone(), provider_config.headers.clone());
263
264 provider.search(query, count).await
265}