1use serde::{Deserialize, Serialize};
2use std::path::PathBuf;
3
4#[derive(Debug, Clone, Serialize, Deserialize)]
10#[serde(rename_all = "camelCase")]
11pub struct SearchConfig {
12 #[serde(default = "default_search_timeout")]
14 pub timeout: u64,
15
16 #[serde(default, skip_serializing_if = "Option::is_none")]
18 pub health: Option<SearchHealthConfig>,
19
20 #[serde(default, rename = "engine")]
22 pub engines: std::collections::HashMap<String, SearchEngineConfig>,
23
24 #[serde(default, skip_serializing_if = "Option::is_none")]
27 pub headless: Option<HeadlessConfig>,
28}
29
30#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
32#[serde(rename_all = "lowercase")]
33pub enum BrowserBackend {
34 #[default]
36 Chrome,
37 Lightpanda,
39}
40
41#[derive(Debug, Clone, Serialize, Deserialize)]
44#[serde(rename_all = "camelCase")]
45pub struct HeadlessConfig {
46 #[serde(default)]
48 pub backend: BrowserBackend,
49
50 #[serde(default = "default_headless_max_tabs")]
52 pub max_tabs: usize,
53
54 #[serde(
56 default,
57 alias = "chromePath",
58 alias = "lightpandaPath",
59 alias = "obscuraPath",
60 alias = "playwrightPath",
61 skip_serializing_if = "Option::is_none"
62 )]
63 pub browser_path: Option<String>,
64
65 #[serde(default, skip_serializing_if = "Vec::is_empty")]
67 pub launch_args: Vec<String>,
68
69 #[serde(default, skip_serializing_if = "Option::is_none")]
71 pub proxy_url: Option<String>,
72}
73
74impl BrowserBackend {
75 pub fn is_lightpanda(self) -> bool {
76 matches!(self, Self::Lightpanda)
77 }
78}
79
80impl Default for HeadlessConfig {
81 fn default() -> Self {
82 Self {
83 backend: BrowserBackend::Chrome,
84 max_tabs: 4,
85 browser_path: None,
86 launch_args: Vec::new(),
87 proxy_url: None,
88 }
89 }
90}
91
92#[derive(Debug, Clone, Serialize, Deserialize)]
94#[serde(rename_all = "camelCase")]
95pub struct DocumentParserConfig {
96 #[serde(default = "default_enabled")]
98 pub enabled: bool,
99
100 #[serde(default = "default_document_parser_max_file_size_mb")]
102 pub max_file_size_mb: u64,
103
104 #[serde(default, skip_serializing_if = "Option::is_none")]
110 pub ocr: Option<DocumentOcrConfig>,
111
112 #[serde(default, skip_serializing_if = "Option::is_none")]
114 pub cache: Option<DocumentCacheConfig>,
115}
116
117impl Default for DocumentParserConfig {
118 fn default() -> Self {
119 Self {
120 enabled: true,
121 max_file_size_mb: default_document_parser_max_file_size_mb(),
122 ocr: None,
123 cache: Some(DocumentCacheConfig::default()),
124 }
125 }
126}
127
128impl DocumentParserConfig {
129 pub fn normalized(&self) -> Self {
130 Self {
131 enabled: self.enabled,
132 max_file_size_mb: self.max_file_size_mb.clamp(1, 1024),
133 ocr: self.ocr.as_ref().map(DocumentOcrConfig::normalized),
134 cache: self.cache.as_ref().map(DocumentCacheConfig::normalized),
135 }
136 }
137}
138
139#[derive(Debug, Clone, Serialize, Deserialize)]
140#[serde(rename_all = "camelCase")]
141pub struct DocumentCacheConfig {
142 #[serde(default = "default_enabled")]
143 pub enabled: bool,
144
145 #[serde(default, skip_serializing_if = "Option::is_none")]
146 pub directory: Option<PathBuf>,
147}
148
149impl Default for DocumentCacheConfig {
150 fn default() -> Self {
151 Self {
152 enabled: true,
153 directory: None,
154 }
155 }
156}
157
158impl DocumentCacheConfig {
159 pub fn normalized(&self) -> Self {
160 Self {
161 enabled: self.enabled,
162 directory: self.directory.clone(),
163 }
164 }
165}
166
167#[derive(Debug, Clone, Serialize, Deserialize)]
169#[serde(rename_all = "camelCase")]
170pub struct DocumentOcrConfig {
171 #[serde(default = "default_enabled")]
173 pub enabled: bool,
174
175 #[serde(default, skip_serializing_if = "Option::is_none")]
177 pub model: Option<String>,
178
179 #[serde(default, skip_serializing_if = "Option::is_none")]
181 pub prompt: Option<String>,
182
183 #[serde(default = "default_document_ocr_max_images")]
185 pub max_images: usize,
186
187 #[serde(default = "default_document_ocr_dpi")]
189 pub dpi: u32,
190
191 #[serde(default, skip_serializing_if = "Option::is_none")]
195 pub provider: Option<String>,
196
197 #[serde(default, skip_serializing_if = "Option::is_none")]
199 pub base_url: Option<String>,
200
201 #[serde(default, skip_serializing_if = "Option::is_none")]
203 pub api_key: Option<String>,
204}
205
206impl Default for DocumentOcrConfig {
207 fn default() -> Self {
208 Self {
209 enabled: false,
210 model: None,
211 prompt: None,
212 max_images: default_document_ocr_max_images(),
213 dpi: default_document_ocr_dpi(),
214 provider: None,
215 base_url: None,
216 api_key: None,
217 }
218 }
219}
220
221impl DocumentOcrConfig {
222 pub fn normalized(&self) -> Self {
223 Self {
224 enabled: self.enabled,
225 model: self.model.clone(),
226 prompt: self.prompt.clone(),
227 max_images: self.max_images.clamp(1, 64),
228 dpi: self.dpi.clamp(72, 600),
229 provider: self.provider.clone(),
230 base_url: self.base_url.clone(),
231 api_key: self.api_key.clone(),
232 }
233 }
234}
235
236#[derive(Debug, Clone, Serialize, Deserialize)]
238#[serde(rename_all = "camelCase")]
239pub struct SearchHealthConfig {
240 #[serde(default = "default_max_failures")]
242 pub max_failures: u32,
243
244 #[serde(default = "default_suspend_seconds")]
246 pub suspend_seconds: u64,
247}
248
249#[derive(Debug, Clone, Serialize, Deserialize)]
251#[serde(rename_all = "camelCase")]
252pub struct SearchEngineConfig {
253 #[serde(default = "default_enabled")]
255 pub enabled: bool,
256
257 #[serde(default = "default_weight")]
259 pub weight: f64,
260
261 #[serde(skip_serializing_if = "Option::is_none")]
263 pub timeout: Option<u64>,
264}
265
266pub(crate) fn default_search_timeout() -> u64 {
267 10
268}
269
270pub(crate) fn default_headless_max_tabs() -> usize {
271 4
272}
273
274fn default_max_failures() -> u32 {
275 3
276}
277
278fn default_suspend_seconds() -> u64 {
279 60
280}
281
282pub(crate) fn default_enabled() -> bool {
283 true
284}
285
286fn default_weight() -> f64 {
287 1.0
288}
289
290pub(crate) fn default_document_parser_max_file_size_mb() -> u64 {
291 50
292}
293
294pub(crate) fn default_document_ocr_max_images() -> usize {
295 8
296}
297
298pub(crate) fn default_document_ocr_dpi() -> u32 {
299 144
300}