use serde::{Deserialize, Serialize};
use std::path::PathBuf;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SearchConfig {
#[serde(default = "default_search_timeout")]
pub timeout: u64,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub health: Option<SearchHealthConfig>,
#[serde(default, rename = "engine")]
pub engines: std::collections::HashMap<String, SearchEngineConfig>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub headless: Option<HeadlessConfig>,
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum BrowserBackend {
#[default]
Chrome,
Lightpanda,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct HeadlessConfig {
#[serde(default)]
pub backend: BrowserBackend,
#[serde(default = "default_headless_max_tabs")]
pub max_tabs: usize,
#[serde(
default,
alias = "chromePath",
alias = "lightpandaPath",
alias = "obscuraPath",
alias = "playwrightPath",
skip_serializing_if = "Option::is_none"
)]
pub browser_path: Option<String>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub launch_args: Vec<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub proxy_url: Option<String>,
}
impl BrowserBackend {
pub fn is_lightpanda(self) -> bool {
matches!(self, Self::Lightpanda)
}
}
impl Default for HeadlessConfig {
fn default() -> Self {
Self {
backend: BrowserBackend::Chrome,
max_tabs: 4,
browser_path: None,
launch_args: Vec::new(),
proxy_url: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct DocumentParserConfig {
#[serde(default = "default_enabled")]
pub enabled: bool,
#[serde(default = "default_document_parser_max_file_size_mb")]
pub max_file_size_mb: u64,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub ocr: Option<DocumentOcrConfig>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub cache: Option<DocumentCacheConfig>,
}
impl Default for DocumentParserConfig {
fn default() -> Self {
Self {
enabled: true,
max_file_size_mb: default_document_parser_max_file_size_mb(),
ocr: None,
cache: Some(DocumentCacheConfig::default()),
}
}
}
impl DocumentParserConfig {
pub fn normalized(&self) -> Self {
Self {
enabled: self.enabled,
max_file_size_mb: self.max_file_size_mb.clamp(1, 1024),
ocr: self.ocr.as_ref().map(DocumentOcrConfig::normalized),
cache: self.cache.as_ref().map(DocumentCacheConfig::normalized),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct DocumentCacheConfig {
#[serde(default = "default_enabled")]
pub enabled: bool,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub directory: Option<PathBuf>,
}
impl Default for DocumentCacheConfig {
fn default() -> Self {
Self {
enabled: true,
directory: None,
}
}
}
impl DocumentCacheConfig {
pub fn normalized(&self) -> Self {
Self {
enabled: self.enabled,
directory: self.directory.clone(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct DocumentOcrConfig {
#[serde(default = "default_enabled")]
pub enabled: bool,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub model: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub prompt: Option<String>,
#[serde(default = "default_document_ocr_max_images")]
pub max_images: usize,
#[serde(default = "default_document_ocr_dpi")]
pub dpi: u32,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub provider: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub base_url: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub api_key: Option<String>,
}
impl Default for DocumentOcrConfig {
fn default() -> Self {
Self {
enabled: false,
model: None,
prompt: None,
max_images: default_document_ocr_max_images(),
dpi: default_document_ocr_dpi(),
provider: None,
base_url: None,
api_key: None,
}
}
}
impl DocumentOcrConfig {
pub fn normalized(&self) -> Self {
Self {
enabled: self.enabled,
model: self.model.clone(),
prompt: self.prompt.clone(),
max_images: self.max_images.clamp(1, 64),
dpi: self.dpi.clamp(72, 600),
provider: self.provider.clone(),
base_url: self.base_url.clone(),
api_key: self.api_key.clone(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SearchHealthConfig {
#[serde(default = "default_max_failures")]
pub max_failures: u32,
#[serde(default = "default_suspend_seconds")]
pub suspend_seconds: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SearchEngineConfig {
#[serde(default = "default_enabled")]
pub enabled: bool,
#[serde(default = "default_weight")]
pub weight: f64,
#[serde(skip_serializing_if = "Option::is_none")]
pub timeout: Option<u64>,
}
pub(crate) fn default_search_timeout() -> u64 {
10
}
pub(crate) fn default_headless_max_tabs() -> usize {
4
}
fn default_max_failures() -> u32 {
3
}
fn default_suspend_seconds() -> u64 {
60
}
pub(crate) fn default_enabled() -> bool {
true
}
fn default_weight() -> f64 {
1.0
}
pub(crate) fn default_document_parser_max_file_size_mb() -> u64 {
50
}
pub(crate) fn default_document_ocr_max_images() -> usize {
8
}
pub(crate) fn default_document_ocr_dpi() -> u32 {
144
}